summaryrefslogtreecommitdiffstats
path: root/sql/filesort_utils.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/filesort_utils.h')
-rw-r--r--sql/filesort_utils.h286
1 files changed, 286 insertions, 0 deletions
diff --git a/sql/filesort_utils.h b/sql/filesort_utils.h
new file mode 100644
index 00000000..1962f149
--- /dev/null
+++ b/sql/filesort_utils.h
@@ -0,0 +1,286 @@
+/* Copyright (c) 2010, 2012 Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#ifndef FILESORT_UTILS_INCLUDED
+#define FILESORT_UTILS_INCLUDED
+
+#include "my_base.h"
+#include "sql_array.h"
+
+class Sort_param;
+/*
+ Calculate cost of merge sort
+
+ @param num_rows Total number of rows.
+ @param num_keys_per_buffer Number of keys per buffer.
+ @param elem_size Size of each element.
+
+ Calculates cost of merge sort by simulating call to merge_many_buff().
+
+ @retval
+ Computed cost of merge sort in disk seeks.
+
+ @note
+ Declared here in order to be able to unit test it,
+ since library dependencies have not been sorted out yet.
+
+ See also comments get_merge_many_buffs_cost().
+*/
+
+double get_merge_many_buffs_cost_fast(ha_rows num_rows,
+ ha_rows num_keys_per_buffer,
+ uint elem_size);
+
+
+/**
+ A wrapper class around the buffer used by filesort().
+ The sort buffer is a contiguous chunk of memory,
+ containing both records to be sorted, and pointers to said records:
+
+ <start of buffer | still unused | end of buffer>
+ |rec 0|record 1 |rec 2| ............ |ptr to rec2|ptr to rec1|ptr to rec0|
+
+ Records will be inserted "left-to-right". Records are not necessarily
+ fixed-size, they can be packed and stored without any "gaps".
+
+ Record pointers will be inserted "right-to-left", as a side-effect
+ of inserting the actual records.
+
+ We wrap the buffer in order to be able to do lazy initialization of the
+ pointers: the buffer is often much larger than what we actually need.
+
+ With this allocation scheme, and lazy initialization of the pointers,
+ we are able to pack variable-sized records in the buffer,
+ and thus possibly have space for more records than we initially estimated.
+
+ The buffer must be kept available for multiple executions of the
+ same sort operation, so we have explicit allocate and free functions,
+ rather than doing alloc/free in CTOR/DTOR.
+*/
+
+class Filesort_buffer
+{
+public:
+ Filesort_buffer() :
+ m_next_rec_ptr(NULL), m_rawmem(NULL), m_record_pointers(NULL),
+ m_sort_keys(NULL),
+ m_num_records(0), m_record_length(0),
+ m_sort_length(0),
+ m_size_in_bytes(0), m_idx(0)
+ {}
+
+ /** Sort me... */
+ void sort_buffer(const Sort_param *param, uint count);
+
+ /**
+ Reverses the record pointer array, to avoid recording new results for
+ non-deterministic mtr tests.
+ */
+ void reverse_record_pointers()
+ {
+ if (m_idx < 2) // There is nothing to swap.
+ return;
+ uchar **keys= get_sort_keys();
+ const longlong count= m_idx - 1;
+ for (longlong ix= 0; ix <= count/2; ++ix)
+ {
+ uchar *tmp= keys[count - ix];
+ keys[count - ix] = keys[ix];
+ keys[ix]= tmp;
+ }
+ }
+
+ /**
+ Initializes all the record pointers.
+ */
+ void init_record_pointers()
+ {
+ init_next_record_pointer();
+ while (m_idx < m_num_records)
+ (void) get_next_record_pointer();
+ reverse_record_pointers();
+ }
+
+ /**
+ Prepares the buffer for the next batch of records to process.
+ */
+ void init_next_record_pointer()
+ {
+ m_idx= 0;
+ m_next_rec_ptr= m_rawmem;
+ m_sort_keys= NULL;
+ }
+
+ /**
+ @returns the number of bytes currently in use for data.
+ */
+ size_t space_used_for_data() const
+ {
+ return m_next_rec_ptr ? m_next_rec_ptr - m_rawmem : 0;
+ }
+
+ /**
+ @returns the number of bytes left in the buffer.
+ */
+ size_t spaceleft() const
+ {
+ DBUG_ASSERT(m_next_rec_ptr >= m_rawmem);
+ const size_t spaceused=
+ (m_next_rec_ptr - m_rawmem) +
+ (static_cast<size_t>(m_idx) * sizeof(uchar*));
+ return m_size_in_bytes - spaceused;
+ }
+
+ /**
+ Is the buffer full?
+ */
+ bool isfull() const
+ {
+ if (m_idx < m_num_records)
+ return false;
+ return spaceleft() < (m_record_length + sizeof(uchar*));
+ }
+
+ /**
+ Where should the next record be stored?
+ */
+ uchar *get_next_record_pointer()
+ {
+ uchar *retval= m_next_rec_ptr;
+ // Save the return value in the record pointer array.
+ m_record_pointers[-m_idx]= m_next_rec_ptr;
+ // Prepare for the subsequent request.
+ m_idx++;
+ m_next_rec_ptr+= m_record_length;
+ return retval;
+ }
+
+ /**
+ Adjusts for actual record length. get_next_record_pointer() above was
+ pessimistic, and assumed that the record could not be packed.
+ */
+ void adjust_next_record_pointer(uint val)
+ {
+ m_next_rec_ptr-= (m_record_length - val);
+ }
+
+ /// Returns total size: pointer array + record buffers.
+ size_t sort_buffer_size() const
+ {
+ return m_size_in_bytes;
+ }
+
+ bool is_allocated() const
+ {
+ return m_rawmem;
+ }
+
+ /**
+ Allocates the buffer, but does *not* initialize pointers.
+ Total size = (num_records * record_length) + (num_records * sizeof(pointer))
+ space for records space for pointer to records
+ Caller is responsible for raising an error if allocation fails.
+
+ @param num_records Number of records.
+ @param record_length (maximum) size of each record.
+ @returns Pointer to allocated area, or NULL in case of out-of-memory.
+ */
+ uchar *alloc_sort_buffer(uint num_records, uint record_length);
+
+ /// Frees the buffer.
+ void free_sort_buffer();
+
+ void reset()
+ {
+ m_rawmem= NULL;
+ }
+ /**
+ Used to access the "right-to-left" array of record pointers as an ordinary
+ "left-to-right" array, so that we can pass it directly on to std::sort().
+ */
+ uchar **get_sort_keys()
+ {
+ if (m_idx == 0)
+ return NULL;
+ return &m_record_pointers[1 - m_idx];
+ }
+
+ /**
+ Gets sorted record number ix. @see get_sort_keys()
+ Only valid after buffer has been sorted!
+ */
+ uchar *get_sorted_record(uint ix)
+ {
+ return m_sort_keys[ix];
+ }
+
+ /**
+ @returns The entire buffer, as a character array.
+ This is for reusing the memory for merge buffers.
+ */
+ Bounds_checked_array<uchar> get_raw_buf()
+ {
+ return Bounds_checked_array<uchar>(m_rawmem, m_size_in_bytes);
+ }
+
+ /**
+ We need an assignment operator, see filesort().
+ This happens to have the same semantics as the one that would be
+ generated by the compiler. We still implement it here, to show shallow
+ assignment explicitly: we have two objects sharing the same array.
+ */
+ Filesort_buffer &operator=(const Filesort_buffer &rhs)
+ {
+ m_next_rec_ptr= rhs.m_next_rec_ptr;
+ m_rawmem= rhs.m_rawmem;
+ m_record_pointers= rhs.m_record_pointers;
+ m_sort_keys= rhs.m_sort_keys;
+ m_num_records= rhs.m_num_records;
+ m_record_length= rhs.m_record_length;
+ m_sort_length= rhs.m_sort_length;
+ m_size_in_bytes= rhs.m_size_in_bytes;
+ m_idx= rhs.m_idx;
+ return *this;
+ }
+
+ uint get_sort_length() const { return m_sort_length; }
+ void set_sort_length(uint val) { m_sort_length= val; }
+
+private:
+ uchar *m_next_rec_ptr; /// The next record will be inserted here.
+ uchar *m_rawmem; /// The raw memory buffer.
+ uchar **m_record_pointers; /// The "right-to-left" array of record pointers.
+ uchar **m_sort_keys; /// Caches the value of get_sort_keys()
+ uint m_num_records; /// Saved value from alloc_sort_buffer()
+ uint m_record_length; /// Saved value from alloc_sort_buffer()
+ uint m_sort_length; /// The length of the sort key.
+ size_t m_size_in_bytes; /// Size of raw buffer, in bytes.
+
+ /**
+ This is the index in the "right-to-left" array of the next record to
+ be inserted into the buffer. It is signed, because we use it in signed
+ expressions like:
+ m_record_pointers[-m_idx];
+ It is longlong rather than int, to ensure that it covers UINT_MAX32
+ without any casting/warning.
+ */
+ longlong m_idx;
+};
+
+int compare_packed_sort_keys(void *sort_keys, unsigned char **a,
+ unsigned char **b);
+qsort2_cmp get_packed_keys_compare_ptr();
+
+#endif // FILESORT_UTILS_INCLUDED