summaryrefslogtreecommitdiffstats
path: root/sql/opt_histogram_json.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sql/opt_histogram_json.h147
1 files changed, 147 insertions, 0 deletions
diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h
new file mode 100644
index 00000000..24846792
--- /dev/null
+++ b/sql/opt_histogram_json.h
@@ -0,0 +1,147 @@
+/*
+ Copyright (c) 2021, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#include "sql_statistics.h"
+
+/*
+ An equi-height histogram which stores real values for bucket bounds.
+
+ Handles @@histogram_type=JSON_HB
+
+ Histogram format in JSON:
+
+ {
+ // The next three are saved but not currently analyzed:
+ "target_histogram_size": nnn,
+ "collected_at": "(date and time)",
+ "collected_by": "(server version)",
+
+ "histogram_hb": [
+ { "start": "value", "size":nnn.nn, "ndv": nnn },
+ ...
+
+ // Optionally, start and/or end can be replaced with _hex variant
+ { "start_hex: "value", "size":nnn.nn, "ndv":nnn},
+
+ ...
+ { "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"},
+ ]
+ }
+
+ Histogram is a JSON object. It has some global properties and "histogram_hb"
+ member whose value is a JSON array of histogram buckets.
+
+ Each bucket is an object with these members:
+ "start" - the first value in the bucket.
+ "size" - fraction of table rows that is contained in the bucket.
+ "ndv" - Number of Distinct Values in the bucket.
+ "end" - Optionally, the last value in the bucket.
+
+ A bucket is a single-point bucket if it has ndv=1.
+
+ Most buckets have no "end" member: the bucket is assumed to contain all
+ values up to the "start" of the next bucket.
+
+ The exception is single-point buckets where last value is the same as the
+ first value.
+
+ start/end can be replaced with start_hex/end_hex. In _hex variant, the
+ constant is encoded in hex. This encoding is used to handle so called
+ "unassigned characters": some non-UTF8 charsets have byte combinations that
+ are not mapped to any UTF8 character.
+*/
+
+class Histogram_json_hb final : public Histogram_base
+{
+ size_t size; /* Number of elements in the histogram */
+
+ /* Collection-time only: collected histogram in the JSON form. */
+ std::string json_text;
+
+ struct Bucket
+ {
+ // The left endpoint in KeyTupleFormat. The endpoint is inclusive, this
+ // value is in this bucket.
+ std::string start_value;
+
+ // Cumulative fraction: The fraction of table rows that fall into this
+ // and preceding buckets.
+ double cum_fract;
+
+ // Number of distinct values in the bucket.
+ longlong ndv;
+ };
+
+ std::vector<Bucket> buckets;
+
+ std::string last_bucket_end_endp;
+
+public:
+ static constexpr const char* JSON_NAME="histogram_hb";
+
+ bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name,
+ Field *field, const char *hist_data,
+ size_t hist_data_len) override;
+
+ void serialize(Field *field) override;
+
+ Histogram_builder *create_builder(Field *col, uint col_len,
+ ha_rows rows) override;
+
+ // returns number of buckets in the histogram
+ uint get_width() override
+ {
+ return (uint)size;
+ }
+
+ Histogram_type get_type() override
+ {
+ return JSON_HB;
+ }
+
+ /*
+ @brief
+ This used to be the size of the histogram on disk, which was redundant
+ (one can check the size directly). Return the number of buckets instead.
+ */
+ uint get_size() override
+ {
+ return (uint)size;
+ }
+ void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
+ ulonglong size) override;
+
+ double point_selectivity(Field *field, key_range *endpoint,
+ double avg_sel) override;
+ double range_selectivity(Field *field, key_range *min_endp,
+ key_range *max_endp, double avg_sel) override;
+
+ void set_json_text(ulonglong sz, const char *json_text_arg,
+ size_t json_text_len)
+ {
+ size= (size_t) sz;
+ json_text.assign(json_text_arg, json_text_len);
+ }
+
+private:
+ int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size,
+ bool *assigned_last_end, const char **err);
+
+ double get_left_fract(int idx);
+ std::string& get_end_value(int idx);
+ int find_bucket(const Field *field, const uchar *lookup_val, int *cmp);
+};
+