summaryrefslogtreecommitdiffstats
path: root/storage/innobase/include/ut0stage.h
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include/ut0stage.h')
-rw-r--r--storage/innobase/include/ut0stage.h499
1 files changed, 499 insertions, 0 deletions
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
new file mode 100644
index 00000000..17fbd91b
--- /dev/null
+++ b/storage/innobase/include/ut0stage.h
@@ -0,0 +1,499 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ut/ut0stage.h
+Supplementary code to performance schema stage instrumentation.
+
+Created Nov 12, 2014 Vasil Dimov
+*******************************************************/
+
+#ifndef ut0stage_h
+#define ut0stage_h
+
+#include <algorithm>
+#include <math.h>
+
+#include "my_global.h" /* needed for headers from mysql/psi/ */
+
+#include "mysql/psi/mysql_stage.h" /* mysql_stage_inc_work_completed */
+#include "mysql/psi/psi.h" /* HAVE_PSI_STAGE_INTERFACE, PSI_stage_progress */
+
+#include "dict0mem.h" /* dict_index_t */
+#include "row0log.h" /* row_log_estimate_work() */
+#include "srv0srv.h" /* ut_stage_alter_t */
+
+#ifdef HAVE_PSI_STAGE_INTERFACE
+
+/** Class used to report ALTER TABLE progress via performance_schema.
+The only user of this class is the ALTER TABLE code and it calls the methods
+in the following order
+constructor
+begin_phase_read_pk()
+ multiple times:
+ n_pk_recs_inc() // once per record read
+ inc() // once per page read
+end_phase_read_pk()
+if any new indexes are being added, for each one:
+ begin_phase_sort()
+ multiple times:
+ inc() // once per record sorted
+ begin_phase_insert()
+ multiple times:
+ inc() // once per record inserted
+ being_phase_log_index()
+ multiple times:
+ inc() // once per log-block applied
+begin_phase_log_table()
+ multiple times:
+ inc() // once per log-block applied
+begin_phase_end()
+destructor
+
+This class knows the specifics of each phase and tries to increment the
+progress in an even manner across the entire ALTER TABLE lifetime. */
+class ut_stage_alter_t {
+public:
+ /** Constructor.
+ @param[in] pk primary key of the old table */
+ explicit
+ ut_stage_alter_t(
+ const dict_index_t* pk)
+ :
+ m_progress(NULL),
+ m_pk(pk),
+ m_n_pk_recs(0),
+ m_n_pk_pages(0),
+ m_n_recs_processed(0),
+ m_cur_phase(NOT_STARTED)
+ {
+ }
+
+ /** Destructor. */
+ ~ut_stage_alter_t();
+
+ /** Flag an ALTER TABLE start (read primary key phase).
+ @param[in] n_sort_indexes number of indexes that will be sorted
+ during ALTER TABLE, used for estimating the total work to be done */
+ void
+ begin_phase_read_pk(
+ ulint n_sort_indexes);
+
+ /** Increment the number of records in PK (table) with 1.
+ This is used to get more accurate estimate about the number of
+ records per page which is needed because some phases work on
+ per-page basis while some work on per-record basis and we want
+ to get the progress as even as possible. */
+ void
+ n_pk_recs_inc();
+
+ /** Flag either one record or one page processed, depending on the
+ current phase.
+ @param[in] inc_val flag this many units processed at once */
+ void
+ inc(
+ ulint inc_val = 1);
+
+ /** Flag the end of reading of the primary key.
+ Here we know the exact number of pages and records and calculate
+ the number of records per page and refresh the estimate. */
+ void
+ end_phase_read_pk();
+
+ /** Flag the beginning of the sort phase.
+ @param[in] sort_multi_factor since merge sort processes
+ one page more than once we only update the estimate once per this
+ many pages processed. */
+ void
+ begin_phase_sort(
+ double sort_multi_factor);
+
+ /** Flag the beginning of the insert phase. */
+ void
+ begin_phase_insert();
+
+ /** Flag the beginning of the log index phase. */
+ void
+ begin_phase_log_index();
+
+ /** Flag the beginning of the log table phase. */
+ void
+ begin_phase_log_table();
+
+ /** Flag the beginning of the end phase. */
+ void
+ begin_phase_end();
+
+private:
+
+ /** Update the estimate of total work to be done. */
+ void
+ reestimate();
+
+ /** Change the current phase.
+ @param[in] new_stage pointer to the new stage to change to */
+ void
+ change_phase(
+ const PSI_stage_info* new_stage);
+
+ /** Performance schema accounting object. */
+ PSI_stage_progress* m_progress;
+
+ /** Old table PK. Used for calculating the estimate. */
+ const dict_index_t* m_pk;
+
+ /** Number of records in the primary key (table), including delete
+ marked records. */
+ ulint m_n_pk_recs;
+
+ /** Number of leaf pages in the primary key. */
+ ulint m_n_pk_pages;
+
+ /** Estimated number of records per page in the primary key. */
+ double m_n_recs_per_page;
+
+ /** Number of indexes that are being added. */
+ ulint m_n_sort_indexes;
+
+ /** During the sort phase, increment the counter once per this
+ many pages processed. This is because sort processes one page more
+ than once. */
+ ulint m_sort_multi_factor;
+
+ /** Number of records processed during sort & insert phases. We
+ need to increment the counter only once page, or once per
+ recs-per-page records. */
+ ulint m_n_recs_processed;
+
+ /** Current phase. */
+ enum {
+ NOT_STARTED = 0,
+ READ_PK = 1,
+ SORT = 2,
+ INSERT = 3,
+ /* JAN: TODO: MySQL 5.7 vrs. MariaDB sql/log.h
+ LOG_INDEX = 5,
+ LOG_TABLE = 6, */
+ LOG_INNODB_INDEX = 5,
+ LOG_INNODB_TABLE = 6,
+ END = 7,
+ } m_cur_phase;
+};
+
+/** Destructor. */
+inline
+ut_stage_alter_t::~ut_stage_alter_t()
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ /* Set completed = estimated before we quit. */
+ mysql_stage_set_work_completed(
+ m_progress,
+ mysql_stage_get_work_estimated(m_progress));
+
+ mysql_end_stage();
+}
+
+/** Flag an ALTER TABLE start (read primary key phase).
+@param[in] n_sort_indexes number of indexes that will be sorted
+during ALTER TABLE, used for estimating the total work to be done */
+inline
+void
+ut_stage_alter_t::begin_phase_read_pk(
+ ulint n_sort_indexes)
+{
+ m_n_sort_indexes = n_sort_indexes;
+
+ m_cur_phase = READ_PK;
+
+ m_progress = mysql_set_stage(
+ srv_stage_alter_table_read_pk_internal_sort.m_key);
+
+ mysql_stage_set_work_completed(m_progress, 0);
+ reestimate();
+}
+
+/** Increment the number of records in PK (table) with 1.
+This is used to get more accurate estimate about the number of
+records per page which is needed because some phases work on
+per-page basis while some work on per-record basis and we want
+to get the progress as even as possible. */
+inline
+void
+ut_stage_alter_t::n_pk_recs_inc()
+{
+ m_n_pk_recs++;
+}
+
+/** Flag either one record or one page processed, depending on the
+current phase. */
+inline
+void
+ut_stage_alter_t::inc(ulint inc_val)
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ ulint multi_factor = 1;
+ bool should_proceed = true;
+
+ switch (m_cur_phase) {
+ case NOT_STARTED:
+ ut_error;
+ case READ_PK:
+ m_n_pk_pages++;
+ ut_ad(inc_val == 1);
+ /* Overall the read pk phase will read all the pages from the
+ PK and will do work, proportional to the number of added
+ indexes, thus when this is called once per read page we
+ increment with 1 + m_n_sort_indexes */
+ inc_val = 1 + m_n_sort_indexes;
+ break;
+ case SORT:
+ multi_factor = m_sort_multi_factor;
+ /* fall through */
+ case INSERT: {
+ /* Increment the progress every nth record. During
+ sort and insert phases, this method is called once per
+ record processed. We need fractional point numbers here
+ because "records per page" is such a number naturally and
+ to avoid rounding skew we want, for example: if there are
+ (double) N records per page, then the work_completed
+ should be incremented on the inc() calls round(k*N),
+ for k=1,2,3... */
+ const double every_nth = m_n_recs_per_page *
+ static_cast<double>(multi_factor);
+
+ const ulint k = static_cast<ulint>(
+ round(static_cast<double>(m_n_recs_processed) /
+ every_nth));
+
+ const ulint nth = static_cast<ulint>(
+ round(static_cast<double>(k) * every_nth));
+
+ should_proceed = m_n_recs_processed == nth;
+
+ m_n_recs_processed++;
+
+ break;
+ }
+ /* JAN: TODO: MySQL 5.7
+ case LOG_INDEX:
+ break;
+ case LOG_TABLE:
+ break; */
+ case LOG_INNODB_INDEX:
+ case LOG_INNODB_TABLE:
+ break;
+ case END:
+ break;
+ }
+
+ if (should_proceed) {
+ mysql_stage_inc_work_completed(m_progress, inc_val);
+ reestimate();
+ }
+}
+
+/** Flag the end of reading of the primary key.
+Here we know the exact number of pages and records and calculate
+the number of records per page and refresh the estimate. */
+inline
+void
+ut_stage_alter_t::end_phase_read_pk()
+{
+ reestimate();
+
+ if (m_n_pk_pages == 0) {
+ /* The number of pages in the PK could be 0 if the tree is
+ empty. In this case we set m_n_recs_per_page to 1 to avoid
+ division by zero later. */
+ m_n_recs_per_page = 1.0;
+ } else {
+ m_n_recs_per_page = std::max(
+ static_cast<double>(m_n_pk_recs)
+ / static_cast<double>(m_n_pk_pages),
+ 1.0);
+ }
+}
+
+/** Flag the beginning of the sort phase.
+@param[in] sort_multi_factor since merge sort processes
+one page more than once we only update the estimate once per this
+many pages processed. */
+inline
+void
+ut_stage_alter_t::begin_phase_sort(
+ double sort_multi_factor)
+{
+ if (sort_multi_factor <= 1.0) {
+ m_sort_multi_factor = 1;
+ } else {
+ m_sort_multi_factor = static_cast<ulint>(
+ round(sort_multi_factor));
+ }
+
+ change_phase(&srv_stage_alter_table_merge_sort);
+}
+
+/** Flag the beginning of the insert phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_insert()
+{
+ change_phase(&srv_stage_alter_table_insert);
+}
+
+/** Flag the beginning of the log index phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_index()
+{
+ change_phase(&srv_stage_alter_table_log_index);
+}
+
+/** Flag the beginning of the log table phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_log_table()
+{
+ change_phase(&srv_stage_alter_table_log_table);
+}
+
+/** Flag the beginning of the end phase. */
+inline
+void
+ut_stage_alter_t::begin_phase_end()
+{
+ change_phase(&srv_stage_alter_table_end);
+}
+
+/** Update the estimate of total work to be done. */
+inline
+void
+ut_stage_alter_t::reestimate()
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ /* During the log table phase we calculate the estimate as
+ work done so far + log size remaining. */
+ if (m_cur_phase == LOG_INNODB_TABLE) {
+ mysql_stage_set_work_estimated(
+ m_progress,
+ mysql_stage_get_work_completed(m_progress)
+ + row_log_estimate_work(m_pk));
+ return;
+ }
+
+ /* During the other phases we use a formula, regardless of
+ how much work has been done so far. */
+
+ /* For number of pages in the PK - if the PK has not been
+ read yet, use stat_n_leaf_pages (approximate), otherwise
+ use the exact number we gathered. */
+ const ulint n_pk_pages
+ = m_cur_phase != READ_PK
+ ? m_n_pk_pages
+ : m_pk->stat_n_leaf_pages;
+
+ ulonglong estimate __attribute__((unused))
+ = n_pk_pages
+ * (1 /* read PK */
+ + m_n_sort_indexes /* row_merge_buf_sort() inside the
+ read PK per created index */
+ + m_n_sort_indexes * 2 /* sort & insert per created index */)
+ + row_log_estimate_work(m_pk);
+
+ /* Prevent estimate < completed */
+ estimate = std::max(estimate,
+ mysql_stage_get_work_completed(m_progress));
+
+ mysql_stage_set_work_estimated(m_progress, estimate);
+}
+
+/** Change the current phase.
+@param[in] new_stage pointer to the new stage to change to */
+inline
+void
+ut_stage_alter_t::change_phase(
+ const PSI_stage_info* new_stage)
+{
+ if (m_progress == NULL) {
+ return;
+ }
+
+ if (new_stage == &srv_stage_alter_table_read_pk_internal_sort) {
+ m_cur_phase = READ_PK;
+ } else if (new_stage == &srv_stage_alter_table_merge_sort) {
+ m_cur_phase = SORT;
+ } else if (new_stage == &srv_stage_alter_table_insert) {
+ m_cur_phase = INSERT;
+ /* JAN: TODO: MySQL 5.7 used LOG_INDEX and LOG_TABLE */
+ } else if (new_stage == &srv_stage_alter_table_log_index) {
+ m_cur_phase = LOG_INNODB_INDEX;
+ } else if (new_stage == &srv_stage_alter_table_log_table) {
+ m_cur_phase = LOG_INNODB_TABLE;
+ } else if (new_stage == &srv_stage_alter_table_end) {
+ m_cur_phase = END;
+ } else {
+ ut_error;
+ }
+
+ const ulonglong c = mysql_stage_get_work_completed(m_progress);
+ const ulonglong e = mysql_stage_get_work_estimated(m_progress);
+
+ m_progress = mysql_set_stage(new_stage->m_key);
+
+ mysql_stage_set_work_completed(m_progress, c);
+ mysql_stage_set_work_estimated(m_progress, e);
+}
+#else /* HAVE_PSI_STAGE_INTERFACE */
+
+class ut_stage_alter_t {
+public:
+ explicit ut_stage_alter_t(const dict_index_t*) {}
+
+ void begin_phase_read_pk(ulint) {}
+
+ void n_pk_recs_inc() {}
+
+ void inc() {}
+ void inc(ulint) {}
+
+ void end_phase_read_pk() {}
+
+ void begin_phase_sort(double) {}
+
+ void begin_phase_insert() {}
+
+ void begin_phase_log_index() {}
+
+ void begin_phase_log_table() {}
+
+ void begin_phase_end() {}
+};
+
+#endif /* HAVE_PSI_STAGE_INTERFACE */
+
+#endif /* ut0stage_h */