summaryrefslogtreecommitdiffstats
path: root/sql/uniques.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sql/uniques.h110
1 files changed, 110 insertions, 0 deletions
diff --git a/sql/uniques.h b/sql/uniques.h
new file mode 100644
index 00000000..7e12a391
--- /dev/null
+++ b/sql/uniques.h
@@ -0,0 +1,110 @@
+/* Copyright (c) 2016 MariaDB corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#ifndef UNIQUE_INCLUDED
+#define UNIQUE_INCLUDED
+
+#include "filesort.h"
+
+/*
+ Unique -- class for unique (removing of duplicates).
+ Puts all values to the TREE. If the tree becomes too big,
+ it's dumped to the file. User can request sorted values, or
+ just iterate through them. In the last case tree merging is performed in
+ memory simultaneously with iteration, so it should be ~2-3x faster.
+ */
+
+class Unique :public Sql_alloc
+{
+ DYNAMIC_ARRAY file_ptrs;
+ ulong max_elements; /* Total number of elements that will be stored in-memory */
+ size_t max_in_memory_size;
+ IO_CACHE file;
+ TREE tree;
+ /* Number of elements filtered out due to min_dupl_count when storing results
+ to table. See Unique::get */
+ ulong filtered_out_elems;
+ uint size;
+
+ uint full_size; /* Size of element + space needed to store the number of
+ duplicates found for the element. */
+ uint min_dupl_count; /* Minimum number of occurences of element required for
+ it to be written to record_pointers.
+ always 0 for unions, > 0 for intersections */
+ bool with_counters;
+
+ bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
+ bool flush();
+
+public:
+ ulong elements;
+ SORT_INFO sort;
+ Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
+ uint size_arg, size_t max_in_memory_size_arg,
+ uint min_dupl_count_arg= 0);
+ ~Unique();
+ ulong elements_in_tree() { return tree.elements_in_tree; }
+ inline bool unique_add(void *ptr)
+ {
+ DBUG_ENTER("unique_add");
+ DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
+ if (!(tree.flag & TREE_ONLY_DUPS) &&
+ tree.elements_in_tree >= max_elements && flush())
+ DBUG_RETURN(1);
+ DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
+ }
+
+ bool is_in_memory() { return (my_b_tell(&file) == 0); }
+ void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
+
+ bool get(TABLE *table);
+
+ /* Cost of searching for an element in the tree */
+ inline static double get_search_cost(ulonglong tree_elems,
+ double compare_factor)
+ {
+ return log((double) tree_elems) / (compare_factor * M_LN2);
+ }
+
+ static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
+ size_t max_in_memory_size, double compare_factor,
+ bool intersect_fl, bool *in_memory);
+ inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
+ size_t max_in_memory_size)
+ {
+ size_t max_elems_in_tree=
+ max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
+
+ if (max_elems_in_tree == 0)
+ max_elems_in_tree= 1;
+ return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
+ }
+
+ void reset();
+ bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
+
+ uint get_size() const { return size; }
+ size_t get_max_in_memory_size() const { return max_in_memory_size; }
+
+ friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
+ friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
+
+ friend int unique_write_to_file_with_count(uchar* key, element_count count,
+ Unique *unique);
+ friend int unique_intersect_write_to_ptrs(uchar* key, element_count count,
+ Unique *unique);
+};
+
+#endif /* UNIQUE_INCLUDED */