summaryrefslogtreecommitdiffstats
path: root/sql/sql_analyze_stmt.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_analyze_stmt.h')
-rw-r--r--sql/sql_analyze_stmt.h479
1 files changed, 479 insertions, 0 deletions
diff --git a/sql/sql_analyze_stmt.h b/sql/sql_analyze_stmt.h
new file mode 100644
index 00000000..d1faa58a
--- /dev/null
+++ b/sql/sql_analyze_stmt.h
@@ -0,0 +1,479 @@
+/*
+ Copyright (c) 2015, 2020, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+
+== ANALYZE-stmt classes ==
+
+This file contains classes for supporting "ANALYZE statement" feature. These are
+a set of data structures that can be used to store the data about how the
+statement executed.
+
+There are two kinds of data collection:
+
+1. Various counters. We assume that incrementing counters has very low
+overhead. Because of that, execution code increments counters unconditionally
+(even when not running "ANALYZE $statement" commands. You run regular SELECT/
+UPDATE/DELETE/etc and the counters are incremented).
+
+As a free bonus, this lets us print detailed information into the slow query
+log, should the query be slow.
+
+2. Timing data. Measuring the time it took to run parts of query has noticeable
+overhead. Because of that, we measure the time only when running "ANALYZE
+$stmt").
+*/
+
+/* fake microseconds as cycles if cycles isn't available */
+
+static inline double timer_tracker_frequency()
+{
+#if (MY_TIMER_ROUTINE_CYCLES)
+ return static_cast<double>(sys_timer_info.cycles.frequency);
+#else
+ return static_cast<double>(sys_timer_info.microseconds.frequency);
+#endif
+}
+
+
+class Gap_time_tracker;
+void attach_gap_time_tracker(THD *thd, Gap_time_tracker *gap_tracker, ulonglong timeval);
+void process_gap_time_tracker(THD *thd, ulonglong timeval);
+
+/*
+ A class for tracking time it takes to do a certain action
+*/
+class Exec_time_tracker
+{
+protected:
+ ulonglong count;
+ ulonglong cycles;
+ ulonglong last_start;
+
+ ulonglong measure() const
+ {
+#if (MY_TIMER_ROUTINE_CYCLES)
+ return my_timer_cycles();
+#else
+ return my_timer_microseconds();
+#endif
+ }
+
+ void cycles_stop_tracking(THD *thd)
+ {
+ ulonglong end= measure();
+ cycles += end - last_start;
+
+ process_gap_time_tracker(thd, end);
+ if (my_gap_tracker)
+ attach_gap_time_tracker(thd, my_gap_tracker, end);
+ }
+
+ /*
+ The time spent after stop_tracking() call on this object and any
+ subsequent time tracking call will be billed to this tracker.
+ */
+ Gap_time_tracker *my_gap_tracker;
+public:
+ Exec_time_tracker() : count(0), cycles(0), my_gap_tracker(NULL) {}
+
+ void set_gap_tracker(Gap_time_tracker *gap_tracker)
+ {
+ my_gap_tracker= gap_tracker;
+ }
+
+ // interface for collecting time
+ void start_tracking(THD *thd)
+ {
+ last_start= measure();
+ process_gap_time_tracker(thd, last_start);
+ }
+
+ void stop_tracking(THD *thd)
+ {
+ count++;
+ cycles_stop_tracking(thd);
+ }
+
+ // interface for getting the time
+ ulonglong get_loops() const { return count; }
+
+ inline double cycles_to_ms(ulonglong cycles_arg) const
+ {
+ // convert 'cycles' to milliseconds.
+ return 1000.0 * static_cast<double>(cycles_arg) /
+ timer_tracker_frequency();
+ }
+
+ double get_time_ms() const
+ {
+ return cycles_to_ms(cycles);
+ }
+ ulonglong get_cycles() const
+ {
+ return cycles;
+ }
+
+ bool has_timed_statistics() const { return cycles > 0; }
+};
+
+
+/*
+ Tracker for time spent between the calls to Exec_time_tracker's {start|
+ stop}_tracking().
+
+ @seealso Gap_time_tracker_data in sql_class.h
+*/
+class Gap_time_tracker
+{
+ ulonglong cycles;
+public:
+ Gap_time_tracker() : cycles(0) {}
+
+ void log_time(ulonglong start, ulonglong end) {
+ cycles += end - start;
+ }
+
+ double get_time_ms() const
+ {
+ // convert 'cycles' to milliseconds.
+ return 1000.0 * static_cast<double>(cycles) / timer_tracker_frequency();
+ }
+};
+
+
+/*
+ A class for counting certain actions (in all queries), and optionally
+ collecting the timings (in ANALYZE queries).
+*/
+
+class Time_and_counter_tracker: public Exec_time_tracker
+{
+public:
+ const bool timed;
+
+ Time_and_counter_tracker(bool timed_arg) : timed(timed_arg)
+ {}
+
+ /* Loops are counted in both ANALYZE and regular queries, as this is cheap */
+ void incr_loops() { count++; }
+
+ /*
+ Unlike Exec_time_tracker::stop_tracking, we don't increase loops.
+ */
+ void stop_tracking(THD *thd)
+ {
+ cycles_stop_tracking(thd);
+ }
+};
+
+#define ANALYZE_START_TRACKING(thd, tracker) \
+ { \
+ (tracker)->incr_loops(); \
+ if (unlikely((tracker)->timed)) \
+ { (tracker)->start_tracking(thd); } \
+ }
+
+#define ANALYZE_STOP_TRACKING(thd, tracker) \
+ if (unlikely((tracker)->timed)) \
+ { (tracker)->stop_tracking(thd); }
+
+
+/*
+ Just a counter to increment one value. Wrapped in a class to be uniform
+ with other counters used by ANALYZE.
+*/
+
+class Counter_tracker
+{
+public:
+ Counter_tracker() : r_scans(0) {}
+ ha_rows r_scans;
+
+ inline void on_scan_init() { r_scans++; }
+
+ bool has_scans() const { return (r_scans != 0); }
+ ha_rows get_loops() const { return r_scans; }
+};
+
+
+/*
+ A class for collecting read statistics.
+
+ The idea is that we run several scans. Each scans gets rows, and then filters
+ some of them out. We count scans, rows, and rows left after filtering.
+
+ (note: at the moment, the class is not actually tied to a physical table.
+ It can be used to track reading from files, buffers, etc).
+*/
+
+class Table_access_tracker
+{
+public:
+ Table_access_tracker() : r_scans(0), r_rows(0), r_rows_after_where(0)
+ {}
+
+ ha_rows r_scans; /* how many scans were ran on this join_tab */
+ ha_rows r_rows; /* How many rows we've got after that */
+ ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
+
+ double get_avg_rows() const
+ {
+ return r_scans
+ ? static_cast<double>(r_rows) / static_cast<double>(r_scans)
+ : 0;
+ }
+
+ double get_filtered_after_where() const
+ {
+ return r_rows > 0
+ ? static_cast<double>(r_rows_after_where) /
+ static_cast<double>(r_rows)
+ : 1.0;
+ }
+
+ inline void on_scan_init() { r_scans++; }
+ inline void on_record_read() { r_rows++; }
+ inline void on_record_after_where() { r_rows_after_where++; }
+
+ bool has_scans() const { return (r_scans != 0); }
+ ha_rows get_loops() const { return r_scans; }
+};
+
+
+class Json_writer;
+
+/*
+ This stores the data about how filesort executed.
+
+ A few things from here (e.g. r_used_pq, r_limit) belong to the query plan,
+ however, these parameters are calculated right during the execution so we
+ can't easily put them into the query plan.
+
+ The class is designed to handle multiple invocations of filesort().
+*/
+
+class Filesort_tracker : public Sql_alloc
+{
+public:
+ Filesort_tracker(bool do_timing) :
+ time_tracker(do_timing), r_limit(0), r_used_pq(0),
+ r_examined_rows(0), r_sorted_rows(0), r_output_rows(0),
+ sort_passes(0),
+ sort_buffer_size(0),
+ r_using_addons(false),
+ r_packed_addon_fields(false),
+ r_sort_keys_packed(false)
+ {}
+
+ /* Functions that filesort uses to report various things about its execution */
+
+ inline void report_use(THD *thd, ha_rows r_limit_arg)
+ {
+ if (!time_tracker.get_loops())
+ r_limit= r_limit_arg;
+ else
+ r_limit= (r_limit != r_limit_arg)? 0: r_limit_arg;
+
+ ANALYZE_START_TRACKING(thd, &time_tracker);
+ }
+ inline void incr_pq_used() { r_used_pq++; }
+
+ inline void report_row_numbers(ha_rows examined_rows,
+ ha_rows sorted_rows,
+ ha_rows returned_rows)
+ {
+ r_examined_rows += examined_rows;
+ r_sorted_rows += sorted_rows;
+ r_output_rows += returned_rows;
+ }
+
+ inline void report_merge_passes_at_start(ulong passes)
+ {
+ sort_passes -= passes;
+ }
+ inline void report_merge_passes_at_end(THD *thd, ulong passes)
+ {
+ ANALYZE_STOP_TRACKING(thd, &time_tracker);
+ sort_passes += passes;
+ }
+
+ inline void report_sort_buffer_size(size_t bufsize)
+ {
+ if (sort_buffer_size)
+ sort_buffer_size= ulonglong(-1); // multiple buffers of different sizes
+ else
+ sort_buffer_size= bufsize;
+ }
+
+ inline void report_addon_fields_format(bool addons_packed)
+ {
+ r_using_addons= true;
+ r_packed_addon_fields= addons_packed;
+ }
+ inline void report_sort_keys_format(bool sort_keys_packed)
+ {
+ r_sort_keys_packed= sort_keys_packed;
+ }
+
+ void get_data_format(String *str);
+
+ /* Functions to get the statistics */
+ void print_json_members(Json_writer *writer);
+
+ ulonglong get_r_loops() const { return time_tracker.get_loops(); }
+ double get_avg_examined_rows() const
+ {
+ return static_cast<double>(r_examined_rows) /
+ static_cast<double>(get_r_loops());
+ }
+ double get_avg_returned_rows() const
+ {
+ return static_cast<double>(r_output_rows) /
+ static_cast<double>(get_r_loops());
+ }
+ double get_r_filtered() const
+ {
+ return r_examined_rows > 0
+ ? static_cast<double>(r_sorted_rows) /
+ static_cast<double>(r_examined_rows)
+ : 1.0;
+ }
+private:
+ Time_and_counter_tracker time_tracker;
+
+ //ulonglong r_loops; /* How many times filesort was invoked */
+ /*
+ LIMIT is typically a constant. There is never "LIMIT 0".
+ HA_POS_ERROR means we never had a limit
+ 0 means different values of LIMIT were used in
+ different filesort invocations
+ other value means the same LIMIT value was used every time.
+ */
+ ulonglong r_limit;
+ ulonglong r_used_pq; /* How many times PQ was used */
+
+ /* How many rows were examined (before checking the select->cond) */
+ ulonglong r_examined_rows;
+
+ /*
+ How many rows were put into sorting (this is examined_rows minus rows that
+ didn't pass the WHERE condition)
+ */
+ ulonglong r_sorted_rows;
+
+ /*
+ How many rows were returned. This is equal to r_sorted_rows, unless there
+ was a LIMIT N clause in which case filesort would not have returned more
+ than N rows.
+ */
+ ulonglong r_output_rows;
+
+ /* How many sorts in total (divide by r_count to get the average) */
+ ulonglong sort_passes;
+
+ /*
+ 0 - means not used (or not known
+ (ulonglong)-1 - multiple
+ other - value
+ */
+ ulonglong sort_buffer_size;
+ bool r_using_addons;
+ bool r_packed_addon_fields;
+ bool r_sort_keys_packed;
+};
+
+
+/**
+ A class to collect data about how rowid filter is executed.
+
+ It stores information about how rowid filter container is filled,
+ containers size and observed selectivity.
+
+ The observed selectivity is calculated in this way.
+ Some elements elem_set are checked if they belong to container.
+ Observed selectivity is calculated as the count of elem_set
+ elements that belong to container devided by all elem_set elements.
+*/
+
+class Rowid_filter_tracker : public Sql_alloc
+{
+private:
+ /* A member to track the time to fill the rowid filter */
+ Time_and_counter_tracker time_tracker;
+
+ /* Size of the rowid filter container buffer */
+ size_t container_buff_size;
+
+ /* Count of elements that were used to fill the rowid filter container */
+ uint container_elements;
+
+ /* Elements counts used for observed selectivity calculation */
+ uint n_checks;
+ uint n_positive_checks;
+public:
+ Rowid_filter_tracker(bool do_timing) :
+ time_tracker(do_timing), container_buff_size(0),
+ container_elements(0), n_checks(0), n_positive_checks(0)
+ {}
+
+ inline void start_tracking(THD *thd)
+ {
+ ANALYZE_START_TRACKING(thd, &time_tracker);
+ }
+
+ inline void stop_tracking(THD *thd)
+ {
+ ANALYZE_STOP_TRACKING(thd, &time_tracker);
+ }
+
+ /* Save container buffer size in bytes */
+ inline void report_container_buff_size(uint elem_size)
+ {
+ container_buff_size= container_elements * elem_size / 8;
+ }
+
+ Time_and_counter_tracker *get_time_tracker()
+ {
+ return &time_tracker;
+ }
+
+ double get_time_fill_container_ms() const
+ {
+ return time_tracker.get_time_ms();
+ }
+
+ void increment_checked_elements_count(bool was_checked)
+ {
+ n_checks++;
+ if (was_checked)
+ n_positive_checks++;
+ }
+
+ inline void increment_container_elements_count() { container_elements++; }
+
+ uint get_container_elements() const { return container_elements; }
+
+ uint get_container_lookups() { return n_checks; }
+
+ double get_r_selectivity_pct() const
+ {
+ return n_checks ? static_cast<double>(n_positive_checks) /
+ static_cast<double>(n_checks) : 0;
+ }
+
+ size_t get_container_buff_size() const { return container_buff_size; }
+};