summaryrefslogtreecommitdiffstats
path: root/storage/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include')
-rw-r--r--storage/innobase/include/btr0btr.h17
-rw-r--r--storage/innobase/include/btr0pcur.h8
-rw-r--r--storage/innobase/include/buf0block_hint.h76
-rw-r--r--storage/innobase/include/buf0buf.h50
-rw-r--r--storage/innobase/include/buf0buf.inl14
-rw-r--r--storage/innobase/include/buf0flu.h10
-rw-r--r--storage/innobase/include/cache.h33
-rw-r--r--storage/innobase/include/data0data.h13
-rw-r--r--storage/innobase/include/data0data.inl30
-rw-r--r--storage/innobase/include/db0err.h12
-rw-r--r--storage/innobase/include/dict0dict.h67
-rw-r--r--storage/innobase/include/dict0mem.h5
-rw-r--r--storage/innobase/include/dict0mem.inl1
-rw-r--r--storage/innobase/include/dyn0buf.h9
-rw-r--r--storage/innobase/include/dyn0types.h3
-rw-r--r--storage/innobase/include/fil0fil.h60
-rw-r--r--storage/innobase/include/fsp0fsp.h20
-rw-r--r--storage/innobase/include/fts0fts.h6
-rw-r--r--storage/innobase/include/fts0priv.h21
-rw-r--r--storage/innobase/include/fts0priv.inl44
-rw-r--r--storage/innobase/include/fts0types.h38
-rw-r--r--storage/innobase/include/fts0types.inl47
-rw-r--r--storage/innobase/include/fut0lst.h50
-rw-r--r--storage/innobase/include/gis0type.h6
-rw-r--r--storage/innobase/include/lock0lock.h74
-rw-r--r--storage/innobase/include/log0crypt.h3
-rw-r--r--storage/innobase/include/log0log.h171
-rw-r--r--storage/innobase/include/mtr0mtr.h28
-rw-r--r--storage/innobase/include/os0file.h89
-rw-r--r--storage/innobase/include/os0file.inl8
-rw-r--r--storage/innobase/include/row0merge.h11
-rw-r--r--storage/innobase/include/row0row.h6
-rw-r--r--storage/innobase/include/row0sel.h4
-rw-r--r--storage/innobase/include/srv0mon.h2
-rw-r--r--storage/innobase/include/srv0srv.h4
-rw-r--r--storage/innobase/include/srw_lock.h50
-rw-r--r--storage/innobase/include/trx0purge.h181
-rw-r--r--storage/innobase/include/trx0rseg.h12
-rw-r--r--storage/innobase/include/trx0trx.h31
-rw-r--r--storage/innobase/include/trx0undo.inl3
-rw-r--r--storage/innobase/include/ut0new.h3
-rw-r--r--storage/innobase/include/ut0ut.h14
-rw-r--r--storage/innobase/include/ut0vec.h9
-rw-r--r--storage/innobase/include/ut0vec.inl13
44 files changed, 581 insertions, 775 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index b42c543c..83bdaa97 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -189,13 +189,16 @@ btr_read_autoinc(dict_index_t* index)
/** Read the last used AUTO_INCREMENT value from PAGE_ROOT_AUTO_INC,
or fall back to MAX(auto_increment_column).
-@param[in] table table containing an AUTO_INCREMENT column
-@param[in] col_no index of the AUTO_INCREMENT column
-@return the AUTO_INCREMENT value
-@retval 0 on error or if no AUTO_INCREMENT value was used yet */
-ib_uint64_t
-btr_read_autoinc_with_fallback(const dict_table_t* table, unsigned col_no)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+@param table table containing an AUTO_INCREMENT column
+@param col_no index of the AUTO_INCREMENT column
+@param mysql_version TABLE_SHARE::mysql_version
+@param max the maximum value of the AUTO_INCREMENT column
+@return the AUTO_INCREMENT value
+@retval 0 on error or if no AUTO_INCREMENT value was used yet */
+uint64_t btr_read_autoinc_with_fallback(const dict_table_t *table,
+ unsigned col_no, ulong mysql_version,
+ uint64_t max)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Write the next available AUTO_INCREMENT value to PAGE_ROOT_AUTO_INC.
@param[in,out] index clustered index
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index c66a3bfa..5f84328d 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -28,7 +28,6 @@ Created 2/23/1996 Heikki Tuuri
#include "dict0dict.h"
#include "btr0cur.h"
-#include "buf0block_hint.h"
#include "btr0btr.h"
#include "gis0rtree.h"
@@ -332,8 +331,8 @@ struct btr_pcur_t
/** BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on
whether cursor was on, before, or after the old_rec record */
btr_pcur_pos_t rel_pos= btr_pcur_pos_t(0);
- /** buffer block when the position was stored */
- buf::Block_hint block_when_stored;
+ /** the page identifier of old_rec */
+ page_id_t old_page_id{0,0};
/** the modify clock value of the buffer block when the cursor position
was stored */
ib_uint64_t modify_clock= 0;
@@ -432,7 +431,8 @@ btr_pcur_open(
}
/** Open a cursor on the first user record satisfying the search condition;
-in case of no match, after the last index record. */
+in case of no match, after the last index record.
+@return DB_SUCCESS or error code */
MY_ATTRIBUTE((nonnull, warn_unused_result))
inline
dberr_t
diff --git a/storage/innobase/include/buf0block_hint.h b/storage/innobase/include/buf0block_hint.h
deleted file mode 100644
index d4fee7c1..00000000
--- a/storage/innobase/include/buf0block_hint.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2020, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2020, MariaDB Corporation.
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License, version 2.0, as published by the
-Free Software Foundation.
-
-This program is also distributed with certain software (including but not
-limited to OpenSSL) that is licensed under separate terms, as designated in a
-particular file or component or in included license documentation. The authors
-of MySQL hereby grant you an additional permission to link the program and
-your derivative works with the separately licensed software that they have
-included with MySQL.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
-for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-#pragma once
-#include "buf0buf.h"
-
-namespace buf {
-class Block_hint {
-public:
- /** Stores the pointer to the block, which is currently buffer-fixed.
- @param block a pointer to a buffer-fixed block to be stored */
- inline void store(buf_block_t *block)
- {
- ut_ad(block->page.buf_fix_count());
- m_block= block;
- m_page_id= block->page.id();
- }
-
- /** Clears currently stored pointer. */
- inline void clear() { m_block= nullptr; }
-
- /** Invoke f on m_block(which may be null)
- @param f The function to be executed. It will be passed the pointer.
- If you wish to use the block pointer subsequently,
- you need to ensure you buffer-fix it before returning from f.
- @return the return value of f
- */
- template <typename F>
- bool run_with_hint(const F &f)
- {
- buffer_fix_block_if_still_valid();
- /* m_block could be changed during f() call, so we use local
- variable to remember which block we need to unfix */
- buf_block_t *block= m_block;
- bool res= f(block);
- if (block)
- block->page.unfix();
- return res;
- }
-
- buf_block_t *block() const { return m_block; }
-
- private:
- /** The block pointer stored by store(). */
- buf_block_t *m_block= nullptr;
- /** If m_block is non-null, the m_block->page.id at time it was stored. */
- page_id_t m_page_id{0, 0};
-
- /** A helper function which checks if m_block is not a dangling pointer and
- still points to block with page with m_page_id and if so, buffer-fixes it,
- otherwise clear()s it */
- void buffer_fix_block_if_still_valid();
-};
-} // namespace buf
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index cd7cc294..c291615c 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -158,14 +158,25 @@ buf_block_free(
#define buf_page_get(ID, SIZE, LA, MTR) \
buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR)
-/** Try to acquire a page latch.
-@param rw_latch RW_S_LATCH or RW_X_LATCH
+/** Try to buffer-fix a page.
@param block guessed block
+@param id expected block->page.id()
+@return block if it was buffer-fixed
+@retval nullptr if the block no longer is valid */
+buf_block_t *buf_page_optimistic_fix(buf_block_t *block, page_id_t id)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Try to acquire a page latch after buf_page_optimistic_fix().
+@param block buffer-fixed block
+@param rw_latch RW_S_LATCH or RW_X_LATCH
@param modify_clock expected value of block->modify_clock
@param mtr mini-transaction
-@return whether the latch was acquired (the page is an allocated file page) */
-bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
- uint64_t modify_clock, mtr_t *mtr);
+@return block if the latch was acquired
+@retval nullptr if block->unfix() was called because it no longer is valid */
+buf_block_t *buf_page_optimistic_get(buf_block_t *block,
+ rw_lock_type_t rw_latch,
+ uint64_t modify_clock, mtr_t *mtr)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Try to S-latch a page.
Suitable for using when holding the lock_sys latches (as it avoids deadlock).
@@ -292,15 +303,6 @@ void
buf_block_modify_clock_inc(
/*=======================*/
buf_block_t* block); /*!< in: block */
-/********************************************************************//**
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block.
-@return value */
-UNIV_INLINE
-ib_uint64_t
-buf_block_get_modify_clock(
-/*=======================*/
- buf_block_t* block); /*!< in: block */
#endif /* !UNIV_INNOCHECKSUM */
/** Check if a buffer is all zeroes.
@@ -771,17 +773,16 @@ public:
@retval DB_FAIL if the page contains the wrong ID */
dberr_t read_complete(const fil_node_t &node);
- /** Note that a block is no longer dirty, while not removing
- it from buf_pool.flush_list
- @param temporary whether the page belongs to the temporary tablespace
- @param error whether an error may have occurred while writing */
- inline void write_complete(bool temporary, bool error);
+ /** Release a write fix after a page write was completed.
+ @param persistent whether the page belongs to a persistent tablespace
+ @param error whether an error may have occurred while writing
+ @param state recently read state() value with the correct io-fix */
+ void write_complete(bool persistent, bool error, uint32_t state);
/** Write a flushable page to a file or free a freeable block.
- @param evict whether to evict the page on write completion
@param space tablespace
@return whether a page write was initiated and buf_pool.mutex released */
- bool flush(bool evict, fil_space_t *space);
+ bool flush(fil_space_t *space);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
@@ -1756,10 +1757,6 @@ public:
/** Decrement the number of pending LRU flush */
inline void n_flush_dec();
- /** Decrement the number of pending LRU flush
- while holding flush_list_mutex */
- inline void n_flush_dec_holding_mutex();
-
/** @return whether flush_list flushing is active */
bool flush_list_active() const
{
@@ -1912,6 +1909,9 @@ public:
/** Free a page whose underlying file page has been freed. */
ATTRIBUTE_COLD void release_freed_page(buf_page_t *bpage) noexcept;
+ /** Issue a warning that we could not free up buffer pool pages. */
+ ATTRIBUTE_COLD void LRU_warn();
+
private:
/** Temporary memory for page_compressed and encrypted I/O */
struct io_buf_t
diff --git a/storage/innobase/include/buf0buf.inl b/storage/innobase/include/buf0buf.inl
index b3158cf1..050c8493 100644
--- a/storage/innobase/include/buf0buf.inl
+++ b/storage/innobase/include/buf0buf.inl
@@ -116,17 +116,3 @@ buf_block_modify_clock_inc(
block->modify_clock++;
}
-
-/********************************************************************//**
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block.
-@return value */
-UNIV_INLINE
-ib_uint64_t
-buf_block_get_modify_clock(
-/*=======================*/
- buf_block_t* block) /*!< in: block */
-{
- ut_ad(block->page.lock.have_any());
- return(block->modify_clock);
-}
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 0cce514b..cc32a38a 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -85,16 +85,6 @@ buf_flush_init_for_writing(
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr)
MY_ATTRIBUTE((warn_unused_result));
-/** Write out dirty blocks from buf_pool.LRU,
-and move clean blocks to buf_pool.free.
-The caller must invoke buf_dblwr.flush_buffered_writes()
-after releasing buf_pool.mutex.
-@param max_n wished maximum mumber of blocks flushed
-@param evict whether to evict pages after flushing
-@return evict ? number of processed pages : number of pages written
-@retval 0 if a buf_pool.LRU batch is already running */
-ulint buf_flush_LRU(ulint max_n, bool evict);
-
/** Wait until a LRU flush batch ends. */
void buf_flush_wait_LRU_batch_end();
/** Wait until all persistent pages are flushed up to a limit.
diff --git a/storage/innobase/include/cache.h b/storage/innobase/include/cache.h
new file mode 100644
index 00000000..0647cbe6
--- /dev/null
+++ b/storage/innobase/include/cache.h
@@ -0,0 +1,33 @@
+/*****************************************************************************
+
+Copyright (c) 2024, MariaDB plc
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+#pragma once
+#include <cstddef>
+
+#if defined __x86_64__ || defined __aarch64__ || defined __powerpc64__
+struct pmem_control
+{
+ void (*persist)(const void *, size_t);
+public:
+ pmem_control();
+};
+extern const pmem_control pmem;
+# define pmem_persist(buf, size) pmem.persist(buf, size)
+#else
+void pmem_persist(const void *buf, size_t size);
+#endif
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index a5356e0d..fcb543eb 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -339,15 +339,12 @@ dtuple_set_types_binary(
dtuple_t* tuple, /*!< in: data tuple */
ulint n) /*!< in: number of fields to set */
MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
+/** Checks if a dtuple contains an SQL null value.
+@param tuple tuple
+@param fields_number number of fields in the tuple to check
+@return true if some field is SQL null */
UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- const dtuple_t* tuple) /*!< in: dtuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
+bool dtuple_contains_null(const dtuple_t *tuple, ulint fields_number = 0);
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
@return TRUE if ok */
diff --git a/storage/innobase/include/data0data.inl b/storage/innobase/include/data0data.inl
index 2d1bf5a2..b6c6ace8 100644
--- a/storage/innobase/include/data0data.inl
+++ b/storage/innobase/include/data0data.inl
@@ -596,28 +596,18 @@ data_write_sql_null(
memset(data, 0, len);
}
-/**********************************************************************//**
-Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
+/** Checks if a dtuple contains an SQL null value.
+@param tuple tuple
+@param fields_number number of fields in the tuple to check
+@return true if some field is SQL null */
UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- const dtuple_t* tuple) /*!< in: dtuple */
+bool dtuple_contains_null(const dtuple_t *tuple, ulint fields_number)
{
- ulint n;
- ulint i;
-
- n = dtuple_get_n_fields(tuple);
-
- for (i = 0; i < n; i++) {
- if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
+ ulint n= fields_number ? fields_number : dtuple_get_n_fields(tuple);
+ for (ulint i= 0; i < n; i++)
+ if (dfield_is_null(dtuple_get_nth_field(tuple, i)))
+ return true;
+ return false;
}
/**************************************************************//**
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 64182aab..960ec390 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -32,23 +32,25 @@ Created 5/24/1996 Heikki Tuuri
enum dberr_t {
DB_SUCCESS,
- DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new
+ DB_SUCCESS_LOCKED_REC= 9, /*!< like DB_SUCCESS, but a new
explicit record lock was created */
/* The following are error codes */
- DB_ERROR = 11,
+ DB_RECORD_CHANGED,
+ DB_ERROR,
DB_INTERRUPTED,
DB_OUT_OF_MEMORY,
DB_OUT_OF_FILE_SPACE,
DB_LOCK_WAIT,
DB_DEADLOCK,
- DB_ROLLBACK,
DB_DUPLICATE_KEY,
DB_MISSING_HISTORY, /*!< required history data has been
deleted due to lack of space in
rollback segment */
- DB_CLUSTER_NOT_FOUND = 30,
- DB_TABLE_NOT_FOUND,
+#ifdef WITH_WSREP
+ DB_ROLLBACK,
+#endif
+ DB_TABLE_NOT_FOUND= 31,
DB_TOO_BIG_RECORD, /*!< a record in an index would not fit
on a compressed page, or it would
become bigger than 1/2 free space in
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 5fafb2c5..3baac658 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -35,6 +35,7 @@ Created 1/8/1996 Heikki Tuuri
#include <my_sys.h>
#include <deque>
+class MDL_context;
class MDL_ticket;
/** the first table or index ID for other than hard-coded system tables */
@@ -139,6 +140,21 @@ dict_acquire_mdl_shared(dict_table_t *table,
MDL_ticket **mdl,
dict_table_op_t table_op= DICT_TABLE_OP_NORMAL);
+/** Acquire MDL shared for the table name.
+@tparam trylock whether to use non-blocking operation
+@param[in,out] table table object
+@param[in,out] mdl_context MDL context
+@param[out] mdl MDL ticket
+@param[in] table_op operation to perform when opening
+@return table object after locking MDL shared
+@retval nullptr if the table is not readable, or if trylock && MDL blocked */
+template<bool trylock>
+__attribute__((nonnull, warn_unused_result))
+dict_table_t*
+dict_acquire_mdl_shared(dict_table_t *table,
+ MDL_context *mdl_context, MDL_ticket **mdl,
+ dict_table_op_t table_op);
+
/** Look up a table by numeric identifier.
@param[in] table_id table identifier
@param[in] dict_locked data dictionary locked
@@ -1314,13 +1330,7 @@ class dict_sys_t
std::atomic<ulonglong> latch_ex_wait_start;
/** the rw-latch protecting the data dictionary cache */
- alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_lock latch;
-#ifdef UNIV_DEBUG
- /** whether latch is being held in exclusive mode (by any thread) */
- Atomic_relaxed<pthread_t> latch_ex;
- /** number of S-latch holders */
- Atomic_counter<uint32_t> latch_readers;
-#endif
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) IF_DBUG(srw_lock_debug,srw_lock) latch;
public:
/** Indexes of SYS_TABLE[] */
enum
@@ -1488,15 +1498,12 @@ public:
}
#ifdef UNIV_DEBUG
- /** @return whether any thread (not necessarily the current thread)
- is holding the latch; that is, this check may return false
- positives */
- bool frozen() const { return latch_readers || latch_ex; }
- /** @return whether any thread (not necessarily the current thread)
- is holding a shared latch */
- bool frozen_not_locked() const { return latch_readers; }
+ /** @return whether the current thread is holding the latch */
+ bool frozen() const { return latch.have_any(); }
+ /** @return whether the current thread is holding a shared latch */
+ bool frozen_not_locked() const { return latch.have_rd(); }
/** @return whether the current thread holds the exclusive latch */
- bool locked() const { return latch_ex == pthread_self(); }
+ bool locked() const { return latch.have_wr(); }
#endif
private:
/** Acquire the exclusive latch */
@@ -1511,13 +1518,7 @@ public:
/** Exclusively lock the dictionary cache. */
void lock(SRW_LOCK_ARGS(const char *file, unsigned line))
{
- if (latch.wr_lock_try())
- {
- ut_ad(!latch_readers);
- ut_ad(!latch_ex);
- ut_d(latch_ex= pthread_self());
- }
- else
+ if (!latch.wr_lock_try())
lock_wait(SRW_LOCK_ARGS(file, line));
}
@@ -1530,27 +1531,11 @@ public:
ATTRIBUTE_NOINLINE void unfreeze();
#else
/** Unlock the data dictionary cache. */
- void unlock()
- {
- ut_ad(latch_ex == pthread_self());
- ut_ad(!latch_readers);
- ut_d(latch_ex= 0);
- latch.wr_unlock();
- }
+ void unlock() { latch.wr_unlock(); }
/** Acquire a shared lock on the dictionary cache. */
- void freeze()
- {
- latch.rd_lock();
- ut_ad(!latch_ex);
- ut_d(latch_readers++);
- }
+ void freeze() { latch.rd_lock(); }
/** Release a shared lock on the dictionary cache. */
- void unfreeze()
- {
- ut_ad(!latch_ex);
- ut_ad(latch_readers--);
- latch.rd_unlock();
- }
+ void unfreeze() { latch.rd_unlock(); }
#endif
/** Estimate the used memory occupied by the data dictionary
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index fde2a714..0268a280 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1010,8 +1010,6 @@ struct dict_index_t {
/*!< number of columns the user defined to
be in the index: in the internal
representation we add more columns */
- unsigned nulls_equal:1;
- /*!< if true, SQL NULL == SQL NULL */
unsigned n_uniq:10;/*!< number of fields from the beginning
which are enough to determine an index
entry uniquely */
@@ -2448,6 +2446,9 @@ public:
/** @return number of unique columns in FTS_DOC_ID index */
unsigned fts_n_uniq() const { return versioned() ? 2 : 1; }
+ /** @return the index for that starts with a specific column */
+ dict_index_t *get_index(const dict_col_t &col) const;
+
/** Create metadata.
@param name table name
@param space tablespace
diff --git a/storage/innobase/include/dict0mem.inl b/storage/innobase/include/dict0mem.inl
index d60ee5d9..edb7cf92 100644
--- a/storage/innobase/include/dict0mem.inl
+++ b/storage/innobase/include/dict0mem.inl
@@ -63,6 +63,5 @@ dict_mem_fill_index_struct(
& index->MAX_N_FIELDS;
/* The '1 +' above prevents allocation
of an empty mem block */
- index->nulls_equal = false;
ut_d(index->magic_n = DICT_INDEX_MAGIC_N);
}
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
index 06af4dcc..7a4e6760 100644
--- a/storage/innobase/include/dyn0buf.h
+++ b/storage/innobase/include/dyn0buf.h
@@ -57,11 +57,7 @@ public:
/**
Gets the number of used bytes in a block.
@return number of bytes used */
- ulint used() const
- MY_ATTRIBUTE((warn_unused_result))
- {
- return(static_cast<ulint>(m_used & ~DYN_BLOCK_FULL_FLAG));
- }
+ uint32_t used() const { return m_used; }
/**
Gets pointer to the start of data.
@@ -153,8 +149,7 @@ public:
/** Storage */
byte m_data[MAX_DATA_SIZE];
- /** number of data bytes used in this block;
- DYN_BLOCK_FULL_FLAG is set when the block becomes full */
+ /** number of data bytes used in this block */
uint32_t m_used;
friend class mtr_buf_t;
diff --git a/storage/innobase/include/dyn0types.h b/storage/innobase/include/dyn0types.h
index 83d0b0d6..af7f663d 100644
--- a/storage/innobase/include/dyn0types.h
+++ b/storage/innobase/include/dyn0types.h
@@ -33,7 +33,4 @@ Created 2013-03-16 Sunny Bains
/** This is the initial 'payload' size of a dynamic array */
#define DYN_ARRAY_DATA_SIZE 512
-/** Flag for dyn_block_t::used that indicates a full block */
-#define DYN_BLOCK_FULL_FLAG 0x1000000UL
-
#endif /* dyn0types_h */
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index cdc32515..dfda1178 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -63,7 +63,7 @@ enum srv_flush_t
SRV_LITTLESYNC,
/** do not flush after writing */
SRV_NOSYNC,
- /** invoke os_file_set_nocache() on data files. This implies using
+ /** Open or create files with O_DIRECT. This implies using
unbuffered I/O but still fdatasync(), because some filesystems might
not flush meta-data on write completion */
SRV_O_DIRECT,
@@ -347,7 +347,6 @@ struct fil_space_t final
~fil_space_t()
{
ut_ad(!latch_owner);
- ut_ad(!latch_count);
latch.destroy();
}
@@ -411,9 +410,9 @@ private:
/** The reference count */
static constexpr uint32_t PENDING= ~(STOPPING | CLOSING | NEEDS_FSYNC);
/** latch protecting all page allocation bitmap pages */
- srw_lock latch;
+ IF_DBUG(srw_lock_debug, srw_lock) latch;
+ /** the thread that holds the exclusive latch, or 0 */
pthread_t latch_owner;
- ut_d(Atomic_relaxed<uint32_t> latch_count;)
public:
/** MariaDB encryption data */
fil_space_crypt_t *crypt_data;
@@ -1004,40 +1003,32 @@ public:
bool recheck, bool encrypt);
#ifdef UNIV_DEBUG
- bool is_latched() const { return latch_count != 0; }
+ bool is_latched() const { return latch.have_any(); }
#endif
- bool is_owner() const { return latch_owner == pthread_self(); }
+ bool is_owner() const
+ {
+ const bool owner{latch_owner == pthread_self()};
+ ut_ad(owner == latch.have_wr());
+ return owner;
+ }
/** Acquire the allocation latch in exclusive mode */
void x_lock()
{
latch.wr_lock(SRW_LOCK_CALL);
ut_ad(!latch_owner);
latch_owner= pthread_self();
- ut_ad(!latch_count.fetch_add(1));
}
/** Release the allocation latch from exclusive mode */
void x_unlock()
{
- ut_ad(latch_count.fetch_sub(1) == 1);
ut_ad(latch_owner == pthread_self());
latch_owner= 0;
latch.wr_unlock();
}
/** Acquire the allocation latch in shared mode */
- void s_lock()
- {
- ut_ad(!is_owner());
- latch.rd_lock(SRW_LOCK_CALL);
- ut_ad(!latch_owner);
- ut_d(latch_count.fetch_add(1));
- }
+ void s_lock() { latch.rd_lock(SRW_LOCK_CALL); }
/** Release the allocation latch from shared mode */
- void s_unlock()
- {
- ut_ad(latch_count.fetch_sub(1));
- ut_ad(!latch_owner);
- latch.rd_unlock();
- }
+ void s_unlock() { latch.rd_unlock(); }
typedef span<const char> name_type;
@@ -1637,17 +1628,34 @@ void fil_close_tablespace(uint32_t id);
/*******************************************************************//**
Allocates and builds a file name from a path, a table or tablespace name
and a suffix. The string must be freed by caller with ut_free().
-@param[in] path NULL or the directory path or the full path and filename.
+@param[in] path nullptr or the directory path or the full path and filename
@param[in] name {} if path is full, or Table/Tablespace name
-@param[in] ext the file extension to use
-@param[in] trim_name true if the last name on the path should be trimmed.
+@param[in] extension the file extension to use
+@param[in] trim_name true if the last name on the path should be trimmed
@return own: file name */
-char* fil_make_filepath(const char *path, const fil_space_t::name_type &name,
- ib_extention ext, bool trim_name);
+char* fil_make_filepath_low(const char *path,
+ const fil_space_t::name_type &name,
+ ib_extention extension, bool trim_name);
char *fil_make_filepath(const char* path, const table_name_t name,
ib_extention suffix, bool strip_name);
+/** Wrapper function over fil_make_filepath_low to build file name.
+@param path nullptr or the directory path or the full path and filename
+@param name {} if path is full, or Table/Tablespace name
+@param extension the file extension to use
+@param trim_name true if the last name on the path should be trimmed
+@return own: file name */
+static inline char*
+fil_make_filepath(const char* path, const fil_space_t::name_type &name,
+ ib_extention extension, bool trim_name)
+{
+ /* If we are going to strip a name off the path, there better be a
+ path and a new name to put back on. */
+ ut_ad(!trim_name || (path && name.data()));
+ return fil_make_filepath_low(path, name, extension, trim_name);
+}
+
/** Create a tablespace file.
@param[in] space_id Tablespace ID
@param[in] name Tablespace name in dbname/tablename format.
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 26261554..99459bcb 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -209,24 +209,6 @@ typedef byte fseg_inode_t;
static constexpr byte FSEG_MAGIC_N_BYTES[4]={0x05,0xd6,0x69,0xd2};
-#define FSEG_FILLFACTOR 8 /* If the number of unused but reserved
- pages in a segment is less than
- reserved pages / FSEG_FILLFACTOR,
- and there are
- at least FSEG_FRAG_LIMIT used pages,
- then we allow a new empty extent to
- be added to the segment in
- fseg_alloc_free_page_general().
- Otherwise, we
- use unused pages of the segment. */
-
-#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
- /* If the segment has >= this many
- used pages, it may be expanded by
- allocating extents to the segment;
- until that only individual fragment
- pages are allocated from the space */
-
#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
is at least this many extents, we
allow extents to be put to the free
@@ -294,7 +276,7 @@ Determine if a page is marked free.
@param[in] descr extent descriptor
@param[in] offset page offset within extent
@return whether the page is free */
-inline bool xdes_is_free(const xdes_t *descr, ulint offset)
+inline bool xdes_is_free(const xdes_t *descr, uint32_t offset)
{
ut_ad(offset < FSP_EXTENT_SIZE);
ulint index= XDES_FREE_BIT + XDES_BITS_PER_PAGE * offset;
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index c0151b44..1d2b409b 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -163,6 +163,9 @@ struct fts_token_t;
struct fts_doc_ids_t;
struct fts_index_cache_t;
+/** Compare two DOC_ID. */
+int fts_doc_id_cmp(const void *p1, const void *p2)
+ __attribute__((nonnull, warn_unused_result));
/** Initialize the "fts_table" for internal query into FTS auxiliary
tables */
@@ -412,6 +415,9 @@ inline void fts_doc_ids_free(fts_doc_ids_t* doc_ids)
mem_heap_free(static_cast<mem_heap_t*>(doc_ids->self_heap->arg));
}
+/** Sort an array of doc_id */
+void fts_doc_ids_sort(ib_vector_t *doc_ids);
+
/******************************************************************//**
Notify the FTS system about an operation on an FTS-indexed table. */
void
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index ae0bb036..04faceb9 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -271,27 +271,6 @@ fts_index_fetch_nodes(
word, /*!< in: the word to fetch */
fts_fetch_t* fetch) /*!< in: fetch callback.*/
MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Compare two fts_trx_table_t instances, we actually compare the
-table id's here.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_cmp(
-/*==============*/
- const void* v1, /*!< in: id1 */
- const void* v2) /*!< in: id2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Compare a table id with a trx_table_t table id.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_id_cmp(
-/*=================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
#define fts_sql_commit(trx) trx_commit_for_mysql(trx)
#define fts_sql_rollback(trx) (trx)->rollback()
/******************************************************************//**
diff --git a/storage/innobase/include/fts0priv.inl b/storage/innobase/include/fts0priv.inl
index 3cb09c92..3d937bb3 100644
--- a/storage/innobase/include/fts0priv.inl
+++ b/storage/innobase/include/fts0priv.inl
@@ -52,47 +52,3 @@ fts_read_object_id(
if the id is HEX or DEC and do the right thing with it. */
return(sscanf(str, UINT64PFx, id) == 1);
}
-
-/******************************************************************//**
-Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_cmp(
-/*==============*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const dict_table_t* table1
- = (*static_cast<const fts_trx_table_t* const*>(p1))->table;
-
- const dict_table_t* table2
- = (*static_cast<const fts_trx_table_t* const*>(p2))->table;
-
- return((table1->id > table2->id)
- ? 1
- : (table1->id == table2->id)
- ? 0
- : -1);
-}
-
-/******************************************************************//**
-Compare a table id with a fts_trx_table_t table id.
-@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_id_cmp(
-/*=================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const uintmax_t* table_id = static_cast<const uintmax_t*>(p1);
- const dict_table_t* table2
- = (*static_cast<const fts_trx_table_t* const*>(p2))->table;
-
- return((*table_id > table2->id)
- ? 1
- : (*table_id == table2->id)
- ? 0
- : -1);
-}
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index fb278d54..7b95348b 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -278,44 +278,6 @@ struct fts_token_t {
extern const fts_index_selector_t fts_index_selector[];
/******************************************************************//**
-Compare two fts_trx_row_t instances doc_ids. */
-UNIV_INLINE
-int
-fts_trx_row_doc_id_cmp(
-/*===================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
-
-/******************************************************************//**
-Compare two fts_ranking_t instances doc_ids. */
-UNIV_INLINE
-int
-fts_ranking_doc_id_cmp(
-/*===================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
-
-/******************************************************************//**
-Compare two doc_ids. */
-UNIV_INLINE
-int fts_doc_id_cmp(
-/*==================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
-
-/******************************************************************//**
Duplicate a string. */
UNIV_INLINE
void
diff --git a/storage/innobase/include/fts0types.inl b/storage/innobase/include/fts0types.inl
index facc1e5c..5b57cad7 100644
--- a/storage/innobase/include/fts0types.inl
+++ b/storage/innobase/include/fts0types.inl
@@ -47,53 +47,6 @@ fts_string_dup(
}
/******************************************************************//**
-Compare two fts_trx_row_t doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_row_doc_id_cmp(
-/*===================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1;
- const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2;
-
- return((int)(tr1->doc_id - tr2->doc_id));
-}
-
-/******************************************************************//**
-Compare two fts_ranking_t doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_ranking_doc_id_cmp(
-/*===================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_ranking_t* rk1 = (const fts_ranking_t*) p1;
- const fts_ranking_t* rk2 = (const fts_ranking_t*) p2;
-
- return((int)(rk1->doc_id - rk2->doc_id));
-}
-
-/******************************************************************//**
-Compare two doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int fts_doc_id_cmp(
-/*==================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const doc_id_t* up1 = static_cast<const doc_id_t*>(p1);
- const doc_id_t* up2 = static_cast<const doc_id_t*>(p2);
-
- return static_cast<int>(*up1 - *up2);
-}
-
-/******************************************************************//**
Get the first character's code position for FTS index partition */
extern
ulint
diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h
index 746dab80..1adec365 100644
--- a/storage/innobase/include/fut0lst.h
+++ b/storage/innobase/include/fut0lst.h
@@ -78,34 +78,40 @@ void flst_init(const buf_block_t &block, byte *base, mtr_t *mtr)
MY_ATTRIBUTE((nonnull));
/** Append a file list node to a list.
-@param[in,out] base base node block
-@param[in] boffset byte offset of the base node
-@param[in,out] add block to be added
-@param[in] aoffset byte offset of the node to be added
-@param[in,out] mtr mini-transaction
+@param base base node block
+@param boffset byte offset of the base node
+@param add block to be added
+@param aoffset byte offset of the node to be added
+@param limit fil_space_t::free_limit
+@param mtr mini-transaction
@return error code */
dberr_t flst_add_last(buf_block_t *base, uint16_t boffset,
- buf_block_t *add, uint16_t aoffset, mtr_t *mtr)
+ buf_block_t *add, uint16_t aoffset,
+ uint32_t limit, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Prepend a file list node to a list.
-@param[in,out] base base node block
-@param[in] boffset byte offset of the base node
-@param[in,out] add block to be added
-@param[in] aoffset byte offset of the node to be added
-@param[in,out] mtr mini-transaction
+@param base base node block
+@param boffset byte offset of the base node
+@param add block to be added
+@param aoffset byte offset of the node to be added
+@param limit fil_space_t::free_limit
+@param mtr mini-transaction
@return error code */
dberr_t flst_add_first(buf_block_t *base, uint16_t boffset,
- buf_block_t *add, uint16_t aoffset, mtr_t *mtr)
+ buf_block_t *add, uint16_t aoffset,
+ uint32_t limit, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Remove a file list node.
-@param[in,out] base base node block
-@param[in] boffset byte offset of the base node
-@param[in,out] cur block to be removed
-@param[in] coffset byte offset of the current record to be removed
-@param[in,out] mtr mini-transaction
+@param base base node block
+@param boffset byte offset of the base node
+@param cur block to be removed
+@param coffset byte offset of the current record to be removed
+@param limit fil_space_t::free_limit
+@param mtr mini-transaction
@return error code */
dberr_t flst_remove(buf_block_t *base, uint16_t boffset,
- buf_block_t *cur, uint16_t coffset, mtr_t *mtr)
+ buf_block_t *cur, uint16_t coffset,
+ uint32_t limit, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** @return the length of a list */
@@ -117,11 +123,9 @@ inline uint32_t flst_get_len(const flst_base_node_t *base)
/** @return a file address */
inline fil_addr_t flst_read_addr(const byte *faddr)
{
- fil_addr_t addr= { mach_read_from_4(faddr + FIL_ADDR_PAGE),
- mach_read_from_2(faddr + FIL_ADDR_BYTE) };
- ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
- return addr;
+ ut_ad(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
+ return fil_addr_t{mach_read_from_4(faddr + FIL_ADDR_PAGE),
+ mach_read_from_2(faddr + FIL_ADDR_BYTE)};
}
/** @return list first node address */
diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h
index d6a4ef67..2dc25a89 100644
--- a/storage/innobase/include/gis0type.h
+++ b/storage/innobase/include/gis0type.h
@@ -66,10 +66,7 @@ typedef std::vector<rtr_rec_t, ut_allocator<rtr_rec_t> > rtr_rec_vector;
/* Structure for matched records on the leaf page */
typedef struct matched_rec {
- byte* bufp; /*!< aligned buffer point */
- byte rec_buf[UNIV_PAGE_SIZE_MAX * 2];
- /*!< buffer used to copy matching rec */
- buf_block_t block; /*!< the shadow buffer block */
+ buf_block_t* block; /*!< the shadow buffer block */
ulint used; /*!< memory used */
rtr_rec_vector* matched_recs; /*!< vector holding the matching rec */
mysql_mutex_t rtr_match_mutex;/*!< mutex protect the match_recs
@@ -107,7 +104,6 @@ typedef struct rtr_info{
/*!< mutex protect the "path" vector */
rtr_mbr_t mbr; /*!< the search MBR */
que_thr_t* thr; /*!< the search thread */
- mem_heap_t* heap; /*!< memory heap */
btr_cur_t* cursor; /*!< cursor used for search */
dict_index_t* index; /*!< index it is searching */
bool need_prdt_lock;
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 59ee7f55..08b9f4bc 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -438,6 +438,13 @@ dberr_t lock_table_for_trx(dict_table_t *table, trx_t *trx, lock_mode mode,
bool no_wait= false)
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Lock the child tables of a table.
+@param table parent table
+@param trx transaction
+@return error code */
+dberr_t lock_table_children(dict_table_t *table, trx_t *trx)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/** Exclusively lock the data dictionary tables.
@param trx dictionary transaction
@return error code
@@ -724,13 +731,8 @@ private:
bool m_initialised;
/** mutex proteting the locks */
- alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock latch;
-#ifdef UNIV_DEBUG
- /** The owner of exclusive latch (0 if none); protected by latch */
- std::atomic<pthread_t> writer{0};
- /** Number of shared latches */
- std::atomic<ulint> readers{0};
-#endif
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE)
+ IF_DBUG(srw_lock_debug,srw_spin_lock) latch;
#ifdef SUX_LOCK_GENERIC
protected:
/** mutex for hash_latch::wait() */
@@ -789,71 +791,35 @@ public:
void wr_lock()
{
mysql_mutex_assert_not_owner(&wait_mutex);
- ut_ad(!is_writer());
latch.wr_lock();
- ut_ad(!writer.exchange(pthread_self(),
- std::memory_order_relaxed));
}
/** Release exclusive lock_sys.latch */
- void wr_unlock()
- {
- ut_ad(writer.exchange(0, std::memory_order_relaxed) ==
- pthread_self());
- latch.wr_unlock();
- }
+ void wr_unlock() { latch.wr_unlock(); }
/** Acquire shared lock_sys.latch */
void rd_lock()
{
mysql_mutex_assert_not_owner(&wait_mutex);
- ut_ad(!is_writer());
latch.rd_lock();
- ut_ad(!writer.load(std::memory_order_relaxed));
- ut_d(readers.fetch_add(1, std::memory_order_relaxed));
}
/** Release shared lock_sys.latch */
- void rd_unlock()
- {
- ut_ad(!is_writer());
- ut_ad(readers.fetch_sub(1, std::memory_order_relaxed));
- latch.rd_unlock();
- }
+ void rd_unlock() { latch.rd_unlock(); }
#endif
/** Try to acquire exclusive lock_sys.latch
@return whether the latch was acquired */
- bool wr_lock_try()
- {
- ut_ad(!is_writer());
- if (!latch.wr_lock_try()) return false;
- ut_ad(!writer.exchange(pthread_self(),
- std::memory_order_relaxed));
- return true;
- }
+ bool wr_lock_try() { return latch.wr_lock_try(); }
/** Try to acquire shared lock_sys.latch
@return whether the latch was acquired */
- bool rd_lock_try()
- {
- ut_ad(!is_writer());
- if (!latch.rd_lock_try()) return false;
- ut_ad(!writer.load(std::memory_order_relaxed));
- ut_d(readers.fetch_add(1, std::memory_order_relaxed));
- return true;
- }
+ bool rd_lock_try() { return latch.rd_lock_try(); }
/** Assert that wr_lock() has been invoked by this thread */
- void assert_locked() const { ut_ad(is_writer()); }
+ void assert_locked() const { ut_ad(latch.have_wr()); }
/** Assert that wr_lock() has not been invoked by this thread */
- void assert_unlocked() const { ut_ad(!is_writer()); }
+ void assert_unlocked() const { ut_ad(!latch.have_wr()); }
#ifdef UNIV_DEBUG
/** @return whether the current thread is the lock_sys.latch writer */
- bool is_writer() const
- {
-# ifdef SUX_LOCK_GENERIC
- return writer.load(std::memory_order_relaxed) == pthread_self();
-# else
- return writer.load(std::memory_order_relaxed) == pthread_self() ||
- (xtest() && !latch.is_locked_or_waiting());
-# endif
- }
+ bool is_writer() const { return latch.have_wr(); }
+ /** @return whether the current thread is holding lock_sys.latch */
+ bool is_holder() const { return latch.have_any(); }
/** Assert that a lock shard is exclusively latched (by some thread) */
void assert_locked(const lock_t &lock) const;
/** Assert that a table lock shard is exclusively latched by this thread */
@@ -965,14 +931,14 @@ extern lock_sys_t lock_sys;
/** @return the index of an array element */
inline ulint lock_sys_t::hash_table::calc_hash(ulint fold) const
{
- ut_ad(lock_sys.is_writer() || lock_sys.readers);
+ ut_ad(lock_sys.is_holder());
return calc_hash(fold, n_cells);
}
/** Get a hash table cell. */
inline hash_cell_t *lock_sys_t::hash_table::cell_get(ulint fold) const
{
- ut_ad(lock_sys.is_writer() || lock_sys.readers);
+ ut_ad(lock_sys.is_holder());
return &array[calc_hash(fold)];
}
diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h
index 22c0c963..2500ac05 100644
--- a/storage/innobase/include/log0crypt.h
+++ b/storage/innobase/include/log0crypt.h
@@ -28,6 +28,9 @@ MDEV-11782: Rewritten for MariaDB 10.2 by Marko Mäkelä, MariaDB Corporation.
#include "log0log.h"
+/** innodb_encrypt_log: whether to encrypt the redo log */
+extern my_bool srv_encrypt_log;
+
/** Initialize the redo log encryption key and random parameters
when creating a new redo log.
The random parameters will be persisted in the log header.
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 54851ca0..cef0dcae 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -132,6 +132,9 @@ public:
/** Redo log buffer */
struct log_t
{
+ /** The maximum buf_size */
+ static constexpr unsigned buf_size_max= os_file_request_size_max;
+
/** The original (not version-tagged) InnoDB redo log format */
static constexpr uint32_t FORMAT_3_23= 0;
/** The MySQL 5.7.9/MariaDB 10.2.2 log format */
@@ -165,60 +168,92 @@ struct log_t
static constexpr lsn_t FIRST_LSN= START_OFFSET;
private:
- /** The log sequence number of the last change of durable InnoDB files */
+ /** the lock bit in buf_free */
+ static constexpr size_t buf_free_LOCK= ~(~size_t{0} >> 1);
alignas(CPU_LEVEL1_DCACHE_LINESIZE)
+ /** first free offset within buf used;
+ the most significant bit is set by lock_lsn() to protect this field
+ as well as write_to_buf, waits */
+ std::atomic<size_t> buf_free;
+public:
+ /** number of write requests (to buf); protected by lock_lsn() or lsn_lock */
+ size_t write_to_buf;
+ /** log record buffer, written to by mtr_t::commit() */
+ byte *buf;
+private:
+ /** The log sequence number of the last change of durable InnoDB files;
+ protected by lock_lsn() or lsn_lock or latch.wr_lock() */
std::atomic<lsn_t> lsn;
/** the first guaranteed-durable log sequence number */
std::atomic<lsn_t> flushed_to_disk_lsn;
- /** log sequence number when log resizing was initiated, or 0 */
- std::atomic<lsn_t> resize_lsn;
- /** set when there may be need to initiate a log checkpoint.
- This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
- std::atomic<bool> need_checkpoint;
+public:
+ /** number of append_prepare_wait(); protected by lock_lsn() or lsn_lock */
+ size_t waits;
+ /** innodb_log_buffer_size (size of buf,flush_buf if !is_pmem(), in bytes) */
+ unsigned buf_size;
+ /** log file size in bytes, including the header */
+ lsn_t file_size;
+
+#ifdef LOG_LATCH_DEBUG
+ typedef srw_lock_debug log_rwlock;
+ typedef srw_mutex log_lsn_lock;
-#if defined(__aarch64__)
- /* On ARM, we do more spinning */
+ bool latch_have_wr() const { return latch.have_wr(); }
+ bool latch_have_rd() const { return latch.have_rd(); }
+ bool latch_have_any() const { return latch.have_any(); }
+#else
+# ifndef UNIV_DEBUG
+# elif defined SUX_LOCK_GENERIC
+ bool latch_have_wr() const { return true; }
+ bool latch_have_rd() const { return true; }
+ bool latch_have_any() const { return true; }
+# else
+ bool latch_have_wr() const { return latch.is_write_locked(); }
+ bool latch_have_rd() const { return latch.is_locked(); }
+ bool latch_have_any() const { return latch.is_locked(); }
+# endif
+# ifdef __aarch64__
+ /* On ARM, we spin more */
typedef srw_spin_lock log_rwlock;
typedef pthread_mutex_wrapper<true> log_lsn_lock;
-#else
+# else
typedef srw_lock log_rwlock;
typedef srw_mutex log_lsn_lock;
+# endif
#endif
-
-public:
- /** rw-lock protecting writes to buf; normal mtr_t::commit()
- outside any log checkpoint is covered by a shared latch */
+ /** exclusive latch for checkpoint, shared for mtr_t::commit() to buf */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock latch;
-private:
- /** mutex protecting buf_free et al, together with latch */
- log_lsn_lock lsn_lock;
-public:
- /** first free offset within buf use; protected by lsn_lock */
- Atomic_relaxed<size_t> buf_free;
- /** number of write requests (to buf); protected by lsn_lock */
- size_t write_to_buf;
- /** number of append_prepare_wait(); protected by lsn_lock */
- size_t waits;
-private:
+
+ /** number of std::swap(buf, flush_buf) and writes from buf to log;
+ protected by latch.wr_lock() */
+ ulint write_to_log;
+
/** Last written LSN */
lsn_t write_lsn;
-public:
- /** log record buffer, written to by mtr_t::commit() */
- byte *buf;
+
/** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
In write_buf(), buf and flush_buf are swapped */
byte *flush_buf;
- /** number of std::swap(buf, flush_buf) and writes from buf to log;
- protected by latch.wr_lock() */
- ulint write_to_log;
-
+ /** set when there may be need to initiate a log checkpoint.
+ This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
+ std::atomic<bool> need_checkpoint;
+ /** whether a checkpoint is pending; protected by latch.wr_lock() */
+ Atomic_relaxed<bool> checkpoint_pending;
+ /** next checkpoint number (protected by latch.wr_lock()) */
+ byte next_checkpoint_no;
+ /** recommended maximum buf_free size, after which the buffer is flushed */
+ unsigned max_buf_free;
/** Log sequence number when a log file overwrite (broken crash recovery)
was noticed. Protected by latch.wr_lock(). */
lsn_t overwrite_warned;
- /** innodb_log_buffer_size (size of buf,flush_buf if !is_pmem(), in bytes) */
- size_t buf_size;
+ /** latest completed checkpoint (protected by latch.wr_lock()) */
+ Atomic_relaxed<lsn_t> last_checkpoint_lsn;
+ /** next checkpoint LSN (protected by latch.wr_lock()) */
+ lsn_t next_checkpoint_lsn;
+ /** Log file */
+ log_file_t log;
private:
/** Log file being constructed during resizing; protected by latch */
log_file_t resize_log;
@@ -229,18 +264,14 @@ private:
/** Buffer for writing to resize_log; @see flush_buf */
byte *resize_flush_buf;
- void init_lsn_lock() {lsn_lock.init(); }
- void lock_lsn() { lsn_lock.wr_lock(); }
- void unlock_lsn() {lsn_lock.wr_unlock(); }
- void destroy_lsn_lock() { lsn_lock.destroy(); }
-
-public:
- /** recommended maximum size of buf, after which the buffer is flushed */
- size_t max_buf_free;
+ /** Special implementation of lock_lsn() for IA-32 and AMD64 */
+ void lsn_lock_bts() noexcept;
+ /** Acquire a lock for updating buf_free and related fields.
+ @return the value of buf_free */
+ size_t lock_lsn() noexcept;
- /** log file size in bytes, including the header */
- lsn_t file_size;
-private:
+ /** log sequence number when log resizing was initiated, or 0 */
+ std::atomic<lsn_t> resize_lsn;
/** the log sequence number at the start of the log file */
lsn_t first_lsn;
#if defined __linux__ || defined _WIN32
@@ -250,8 +281,6 @@ private:
public:
/** format of the redo log: e.g., FORMAT_10_8 */
uint32_t format;
- /** Log file */
- log_file_t log;
#if defined __linux__ || defined _WIN32
/** whether file system caching is enabled for the log */
my_bool log_buffered;
@@ -279,21 +308,29 @@ public:
/*!< this is the maximum allowed value
for lsn - last_checkpoint_lsn when a
new query step is started */
- /** latest completed checkpoint (protected by latch.wr_lock()) */
- Atomic_relaxed<lsn_t> last_checkpoint_lsn;
- /** next checkpoint LSN (protected by log_sys.latch) */
- lsn_t next_checkpoint_lsn;
- /** next checkpoint number (protected by latch.wr_lock()) */
- ulint next_checkpoint_no;
- /** whether a checkpoint is pending */
- Atomic_relaxed<bool> checkpoint_pending;
/** buffer for checkpoint header */
byte *checkpoint_buf;
/* @} */
+private:
+ /** A lock when the spin-only lock_lsn() is not being used */
+ log_lsn_lock lsn_lock;
+public:
+
bool is_initialised() const noexcept { return max_buf_free != 0; }
+ /** whether there is capacity in the log buffer */
+ bool buf_free_ok() const noexcept
+ {
+ ut_ad(!is_pmem());
+ return (buf_free.load(std::memory_order_relaxed) & ~buf_free_LOCK) <
+ max_buf_free;
+ }
+
+ void set_buf_free(size_t f) noexcept
+ { ut_ad(f < buf_free_LOCK); buf_free.store(f, std::memory_order_relaxed); }
+
#ifdef HAVE_PMEM
bool is_pmem() const noexcept { return !flush_buf; }
#else
@@ -302,7 +339,7 @@ public:
bool is_opened() const noexcept { return log.is_opened(); }
- /** @return target write LSN to react on buf_free >= max_buf_free */
+ /** @return target write LSN to react on !buf_free_ok() */
inline lsn_t get_write_target() const;
/** @return LSN at which log resizing was started and is still in progress
@@ -402,9 +439,7 @@ public:
void set_recovered_lsn(lsn_t lsn) noexcept
{
-#ifndef SUX_LOCK_GENERIC
- ut_ad(latch.is_write_locked());
-#endif /* SUX_LOCK_GENERIC */
+ ut_ad(latch_have_wr());
write_lsn= lsn;
this->lsn.store(lsn, std::memory_order_relaxed);
flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed);
@@ -444,17 +479,23 @@ public:
private:
/** Wait in append_prepare() for buffer to become available
- @param lsn log sequence number to write up to
- @param ex whether log_sys.latch is exclusively locked */
- ATTRIBUTE_COLD void append_prepare_wait(lsn_t lsn, bool ex) noexcept;
+ @tparam spin whether to use the spin-only lock_lsn()
+ @param b the value of buf_free
+ @param ex whether log_sys.latch is exclusively locked
+ @param lsn log sequence number to write up to
+ @return the new value of buf_free */
+ template<bool spin>
+ ATTRIBUTE_COLD size_t append_prepare_wait(size_t b, bool ex, lsn_t lsn)
+ noexcept;
public:
/** Reserve space in the log buffer for appending data.
+ @tparam spin whether to use the spin-only lock_lsn()
@tparam pmem log_sys.is_pmem()
@param size total length of the data to append(), in bytes
@param ex whether log_sys.latch is exclusively locked
@return the start LSN and the buffer position for append() */
- template<bool pmem>
- inline std::pair<lsn_t,byte*> append_prepare(size_t size, bool ex) noexcept;
+ template<bool spin,bool pmem>
+ std::pair<lsn_t,byte*> append_prepare(size_t size, bool ex) noexcept;
/** Append a string of bytes to the redo log.
@param d destination
@@ -462,9 +503,7 @@ public:
@param size length of str, in bytes */
void append(byte *&d, const void *s, size_t size) noexcept
{
-#ifndef SUX_LOCK_GENERIC
- ut_ad(latch.is_locked());
-#endif
+ ut_ad(latch_have_any());
ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size));
memcpy(d, s, size);
d+= size;
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index c916edc9..bfa66216 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -695,14 +695,40 @@ private:
/** Encrypt the log */
ATTRIBUTE_NOINLINE void encrypt();
+ /** Commit the mini-transaction log.
+ @tparam pmem log_sys.is_pmem()
+ @param mtr mini-transaction
+ @param lsns {start_lsn,flush_ahead} */
+ template<bool pmem>
+ static void commit_log(mtr_t *mtr, std::pair<lsn_t,page_flush_ahead> lsns);
+
/** Append the redo log records to the redo log buffer.
@return {start_lsn,flush_ahead} */
std::pair<lsn_t,page_flush_ahead> do_write();
/** Append the redo log records to the redo log buffer.
+ @tparam spin whether to use the spin-only log_sys.lock_lsn()
+ @tparam pmem log_sys.is_pmem()
+ @param mtr mini-transaction
@param len number of bytes to write
@return {start_lsn,flush_ahead} */
- std::pair<lsn_t,page_flush_ahead> finish_write(size_t len);
+ template<bool spin,bool pmem> static
+ std::pair<lsn_t,page_flush_ahead> finish_writer(mtr_t *mtr, size_t len);
+
+ /** The applicable variant of commit_log() */
+ static void (*commit_logger)(mtr_t *, std::pair<lsn_t,page_flush_ahead>);
+ /** The applicable variant of finish_writer() */
+ static std::pair<lsn_t,page_flush_ahead> (*finisher)(mtr_t *, size_t);
+
+ std::pair<lsn_t,page_flush_ahead> finish_write(size_t len)
+ { return finisher(this, len); }
+public:
+ /** Poll interval in log_sys.lock_lsn(); 0 to use log_sys.lsn_lock.
+ Protected by LOCK_global_system_variables and log_sys.latch. */
+ static unsigned spin_wait_delay;
+ /** Update finisher when spin_wait_delay is changing to or from 0. */
+ static void finisher_update();
+private:
/** Release all latches. */
void release();
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index c8374515..7eba359f 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -46,6 +46,18 @@ Created 10/21/1995 Heikki Tuuri
#include <time.h>
#endif /* !_WIN32 */
+/** The maximum size of a read or write request.
+
+According to Linux "man 2 read" and "man 2 write" this applies to
+both 32-bit and 64-bit systems.
+
+On FreeBSD, the limit is close to the Linux one, INT_MAX.
+
+On Microsoft Windows, the limit is UINT_MAX (4 GiB - 1).
+
+On other systems, the limit typically is up to SSIZE_T_MAX. */
+static constexpr unsigned os_file_request_size_max= 0x7ffff000;
+
extern bool os_has_said_disk_full;
/** File offset in bytes */
@@ -109,25 +121,21 @@ struct pfs_os_file_t
/** Options for os_file_create_func @{ */
enum os_file_create_t {
- OS_FILE_OPEN = 51, /*!< to open an existing file (if
- doesn't exist, error) */
- OS_FILE_CREATE, /*!< to create new file (if
- exists, error) */
- OS_FILE_OVERWRITE, /*!< to create a new file, if exists
- the overwrite old file */
- OS_FILE_OPEN_RAW, /*!< to open a raw device or disk
- partition */
- OS_FILE_CREATE_PATH, /*!< to create the directories */
- OS_FILE_OPEN_RETRY, /*!< open with retry */
-
- /** Flags that can be combined with the above values. Please ensure
- that the above values stay below 128. */
-
- OS_FILE_ON_ERROR_NO_EXIT = 128, /*!< do not exit on unknown errors */
- OS_FILE_ON_ERROR_SILENT = 256 /*!< don't print diagnostic messages to
- the log unless it is a fatal error,
- this flag is only used if
- ON_ERROR_NO_EXIT is set */
+ /** create a new file */
+ OS_FILE_CREATE= 0,
+ /** open an existing file */
+ OS_FILE_OPEN,
+ /** retry opening an existing file */
+ OS_FILE_OPEN_RETRY,
+ /** open a raw block device */
+ OS_FILE_OPEN_RAW,
+
+ /** do not display diagnostic messages */
+ OS_FILE_ON_ERROR_SILENT= 4,
+
+ OS_FILE_CREATE_SILENT= OS_FILE_CREATE | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_OPEN_SILENT= OS_FILE_OPEN | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_OPEN_RETRY_SILENT= OS_FILE_OPEN_RETRY | OS_FILE_ON_ERROR_SILENT
};
static const ulint OS_FILE_READ_ONLY = 333;
@@ -144,7 +152,7 @@ static const ulint OS_FILE_NORMAL = 62;
/** Types for file create @{ */
static constexpr ulint OS_DATA_FILE = 100;
static constexpr ulint OS_LOG_FILE = 101;
-#if defined _WIN32 || defined HAVE_FCNTL_DIRECT
+#if defined _WIN32 || defined O_DIRECT
static constexpr ulint OS_DATA_FILE_NO_O_DIRECT = 103;
#endif
/* @} */
@@ -191,14 +199,10 @@ public:
WRITE_ASYNC= WRITE_SYNC | 1,
/** A doublewrite batch */
DBLWR_BATCH= WRITE_ASYNC | 8,
- /** Write data; evict the block on write completion */
- WRITE_LRU= WRITE_ASYNC | 32,
/** Write data and punch hole for the rest */
- PUNCH= WRITE_ASYNC | 64,
- /** Write data and punch hole; evict the block on write completion */
- PUNCH_LRU= PUNCH | WRITE_LRU,
+ PUNCH= WRITE_ASYNC | 16,
/** Zero out a range of bytes in fil_space_t::io() */
- PUNCH_RANGE= WRITE_SYNC | 128,
+ PUNCH_RANGE= WRITE_SYNC | 32,
};
constexpr IORequest(buf_page_t *bpage, buf_tmp_buffer_t *slot,
@@ -211,7 +215,6 @@ public:
bool is_read() const { return (type & READ_SYNC) != 0; }
bool is_write() const { return (type & WRITE_SYNC) != 0; }
- bool is_LRU() const { return (type & (WRITE_LRU ^ WRITE_ASYNC)) != 0; }
bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; }
void write_complete(int io_error) const;
@@ -349,7 +352,7 @@ A simple function to open or create a file.
pfs_os_file_t
os_file_create_simple_func(
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint access_type,
bool read_only,
bool* success);
@@ -358,7 +361,7 @@ os_file_create_simple_func(
os_file_create_simple_no_error_handling(), not directly this function!
A simple function to open or create a file.
@param[in] name name of the file or path as a null-terminated string
-@param[in] create_mode create mode
+@param[in] create_mode OS_FILE_CREATE or OS_FILE_OPEN
@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option
is used by a backup program reading the file
@@ -369,28 +372,12 @@ A simple function to open or create a file.
pfs_os_file_t
os_file_create_simple_no_error_handling_func(
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint access_type,
bool read_only,
bool* success)
MY_ATTRIBUTE((warn_unused_result));
-#ifndef HAVE_FCNTL_DIRECT
-#define os_file_set_nocache(fd, file_name, operation_name) do{}while(0)
-#else
-/** Tries to disable OS caching on an opened file descriptor.
-@param[in] fd file descriptor to alter
-@param[in] file_name file name, used in the diagnostic message
-@param[in] name "open" or "create"; used in the diagnostic
- message */
-void
-os_file_set_nocache(
-/*================*/
- int fd, /*!< in: file descriptor to alter */
- const char* file_name,
- const char* operation_name);
-#endif
-
#ifndef _WIN32 /* On Microsoft Windows, mandatory locking is used */
/** Obtain an exclusive lock on a file.
@param fd file descriptor
@@ -419,7 +406,7 @@ Opens an existing file or creates a new.
pfs_os_file_t
os_file_create_func(
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint purpose,
ulint type,
bool read_only,
@@ -617,7 +604,7 @@ pfs_os_file_t
pfs_os_file_create_simple_func(
mysql_pfs_key_t key,
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint access_type,
bool read_only,
bool* success,
@@ -633,7 +620,7 @@ monitor file creation/open.
@param[in] key Performance Schema Key
@param[in] name name of the file or path as a null-terminated
string
-@param[in] create_mode create mode
+@param[in] create_mode OS_FILE_CREATE or OS_FILE_OPEN
@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file
@@ -648,7 +635,7 @@ pfs_os_file_t
pfs_os_file_create_simple_no_error_handling_func(
mysql_pfs_key_t key,
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint access_type,
bool read_only,
bool* success,
@@ -681,7 +668,7 @@ pfs_os_file_t
pfs_os_file_create_func(
mysql_pfs_key_t key,
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint purpose,
ulint type,
bool read_only,
diff --git a/storage/innobase/include/os0file.inl b/storage/innobase/include/os0file.inl
index 7de31505..a7603028 100644
--- a/storage/innobase/include/os0file.inl
+++ b/storage/innobase/include/os0file.inl
@@ -45,7 +45,7 @@ pfs_os_file_t
pfs_os_file_create_simple_func(
mysql_pfs_key_t key,
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint access_type,
bool read_only,
bool* success,
@@ -80,7 +80,7 @@ monitor file creation/open.
@param[in] key Performance Schema Key
@param[in] name name of the file or path as a null-terminated
string
-@param[in] create_mode create mode
+@param[in] create_mode OS_FILE_CREATE or OS_FILE_OPEN
@param[in] access_type OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file
@@ -95,7 +95,7 @@ pfs_os_file_t
pfs_os_file_create_simple_no_error_handling_func(
mysql_pfs_key_t key,
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint access_type,
bool read_only,
bool* success,
@@ -146,7 +146,7 @@ pfs_os_file_t
pfs_os_file_create_func(
mysql_pfs_key_t key,
const char* name,
- ulint create_mode,
+ os_file_create_t create_mode,
ulint purpose,
ulint type,
bool read_only,
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index 93ea650d..1c2af128 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -165,14 +165,11 @@ row_merge_drop_indexes(
prepare_inplace_alter_table_dict(). */
void row_merge_drop_temp_indexes();
-/** Create temporary merge files in the given paramater path, and if
-UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
-@param[in] path location for creating temporary merge files, or NULL
+/** Create a temporary file at the specified path.
+@param path location for creating temporary merge files, or nullptr
@return File descriptor */
-pfs_os_file_t
-row_merge_file_create_low(
- const char* path)
- MY_ATTRIBUTE((warn_unused_result));
+pfs_os_file_t row_merge_file_create_low(const char *path)
+ MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
Destroy a merge file. And de-register the file from Performance Schema
if UNIV_PFS_IO is defined. */
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index a1350740..7056c77f 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -370,6 +370,12 @@ row_search_index_entry(
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Get the byte offset of the DB_TRX_ID column
+@param[in] rec clustered index record
+@param[in] index clustered index
+@return the byte offset of DB_TRX_ID, from the start of rec */
+ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index);
+
#define ROW_COPY_DATA 1
#define ROW_COPY_POINTERS 2
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index 8134c60f..54e4a1d2 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -115,8 +115,8 @@ row_sel_convert_mysql_key_to_innobase(
ulint buf_len, /*!< in: buffer length */
dict_index_t* index, /*!< in: index of the key value */
const byte* key_ptr, /*!< in: MySQL key value */
- ulint key_len); /*!< in: MySQL key value length */
-
+ ulint key_len) /*!< in: MySQL key value length */
+ MY_ATTRIBUTE((nonnull(1,4,5)));
/** Search for rows in the database using cursor.
Function is mainly used for tables that are shared across connections and
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 51f3049b..2ed26748 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -194,7 +194,6 @@ enum monitor_id_t {
MONITOR_FLUSH_ADAPTIVE_AVG_PASS,
MONITOR_LRU_GET_FREE_LOOPS,
- MONITOR_LRU_GET_FREE_WAITS,
MONITOR_FLUSH_AVG_PAGE_RATE,
MONITOR_FLUSH_LSN_AVG_RATE,
@@ -215,7 +214,6 @@ enum monitor_id_t {
MONITOR_LRU_BATCH_SCANNED_PER_CALL,
MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT,
MONITOR_LRU_GET_FREE_SEARCH,
MONITOR_LRU_SEARCH_SCANNED,
MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 457d9ab5..5e6bfc33 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -121,10 +121,6 @@ struct srv_stats_t
ulint_ctr_n_t n_temp_blocks_decrypted;
};
-/** We are prepared for a situation that we have this many threads waiting for
-a transactional lock inside InnoDB. srv_start() sets the value. */
-extern ulint srv_max_n_threads;
-
extern const char* srv_main_thread_op_info;
/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h
index 01067322..98c256d3 100644
--- a/storage/innobase/include/srw_lock.h
+++ b/storage/innobase/include/srw_lock.h
@@ -153,7 +153,7 @@ template<bool spinloop> class srw_lock_impl;
/** Slim shared-update-exclusive lock with no recursion */
template<bool spinloop>
-class ssux_lock_impl final
+class ssux_lock_impl
{
#ifdef UNIV_PFS_RWLOCK
friend class ssux_lock;
@@ -550,3 +550,51 @@ typedef srw_lock_impl<false> srw_lock;
typedef srw_lock_impl<true> srw_spin_lock;
#endif
+
+#ifdef UNIV_DEBUG
+# include <unordered_set>
+
+class srw_lock_debug : private srw_lock
+{
+ /** The owner of the exclusive lock (0 if none) */
+ std::atomic<pthread_t> writer;
+ /** Protects readers */
+ mutable srw_mutex readers_lock;
+ /** Threads that hold the lock in shared mode */
+ std::atomic<std::unordered_multiset<pthread_t>*> readers;
+
+ /** Register a read lock. */
+ void readers_register();
+
+public:
+ void SRW_LOCK_INIT(mysql_pfs_key_t key);
+ void destroy();
+
+#ifndef SUX_LOCK_GENERIC
+ /** @return whether any lock may be held by any thread */
+ bool is_locked_or_waiting() const noexcept
+ { return srw_lock::is_locked_or_waiting(); }
+ /** @return whether an exclusive lock may be held by any thread */
+ bool is_write_locked() const noexcept { return srw_lock::is_write_locked(); }
+#endif
+
+ /** Acquire an exclusive lock */
+ void wr_lock(SRW_LOCK_ARGS(const char *file, unsigned line));
+ /** @return whether an exclusive lock was acquired */
+ bool wr_lock_try();
+ /** Release after wr_lock() */
+ void wr_unlock();
+ /** Acquire a shared lock */
+ void rd_lock(SRW_LOCK_ARGS(const char *file, unsigned line));
+ /** @return whether a shared lock was acquired */
+ bool rd_lock_try();
+ /** Release after rd_lock() */
+ void rd_unlock();
+ /** @return whether this thread is between rd_lock() and rd_unlock() */
+ bool have_rd() const noexcept;
+ /** @return whether this thread is between wr_lock() and wr_unlock() */
+ bool have_wr() const noexcept;
+ /** @return whether this thread is holding rd_lock() or wr_lock() */
+ bool have_any() const noexcept;
+};
+#endif
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 0f4f8afa..1fb6cd68 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -55,80 +55,74 @@ Run a purge batch.
@return number of undo log pages handled in the batch */
ulint trx_purge(ulint n_tasks, ulint history_size);
-/** Rollback segements from a given transaction with trx-no
-scheduled for purge. */
-class TrxUndoRsegs {
-private:
- typedef std::vector<trx_rseg_t*, ut_allocator<trx_rseg_t*> >
- trx_rsegs_t;
-public:
- typedef trx_rsegs_t::iterator iterator;
- typedef trx_rsegs_t::const_iterator const_iterator;
-
- TrxUndoRsegs() = default;
-
- /** Constructor */
- TrxUndoRsegs(trx_rseg_t& rseg)
- : trx_no(rseg.last_trx_no()), m_rsegs(1, &rseg) {}
- /** Constructor */
- TrxUndoRsegs(trx_id_t trx_no, trx_rseg_t& rseg)
- : trx_no(trx_no), m_rsegs(1, &rseg) {}
-
- bool operator!=(const TrxUndoRsegs& other) const
- { return trx_no != other.trx_no; }
- bool empty() const { return m_rsegs.empty(); }
- void erase(iterator& it) { m_rsegs.erase(it); }
- iterator begin() { return(m_rsegs.begin()); }
- iterator end() { return(m_rsegs.end()); }
- const_iterator begin() const { return m_rsegs.begin(); }
- const_iterator end() const { return m_rsegs.end(); }
-
- /** Compare two TrxUndoRsegs based on trx_no.
- @param elem1 first element to compare
- @param elem2 second element to compare
- @return true if elem1 > elem2 else false.*/
- bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
- {
- return(lhs.trx_no > rhs.trx_no);
- }
-
- /** Copy of trx_rseg_t::last_trx_no() */
- trx_id_t trx_no= 0;
-private:
- /** Rollback segments of a transaction, scheduled for purge. */
- trx_rsegs_t m_rsegs{};
-};
-
-typedef std::priority_queue<
- TrxUndoRsegs,
- std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
- TrxUndoRsegs> purge_pq_t;
-
-/** Chooses the rollback segment with the oldest committed transaction */
-struct TrxUndoRsegsIterator {
- /** Constructor */
- TrxUndoRsegsIterator();
- /** Sets the next rseg to purge in purge_sys.
- Executed in the purge coordinator thread.
- @retval false when nothing is to be purged
- @retval true when purge_sys.rseg->latch was locked */
- inline bool set_next();
-
-private:
- // Disable copying
- TrxUndoRsegsIterator(const TrxUndoRsegsIterator&);
- TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
-
- /** The current element to process */
- TrxUndoRsegs m_rsegs;
- /** Track the current element in m_rsegs */
- TrxUndoRsegs::const_iterator m_iter;
-};
-
/** The control structure used in the purge operation */
class purge_sys_t
{
- friend TrxUndoRsegsIterator;
+ /** Min-heap based priority queue of (trx_no, trx_sys.rseg_array index)
+ pairs, ordered on trx_no. The highest 64-TRX_NO_SHIFT bits of each element is
+ trx_no, the lowest 8 bits is rseg's index in trx_sys.rseg_array. */
+ class purge_queue
+ {
+ public:
+ typedef std::vector<uint64_t, ut_allocator<uint64_t>> container_type;
+ /** Number of bits reseved to shift trx_no in purge queue element */
+ static constexpr unsigned TRX_NO_SHIFT= 8;
+
+ bool empty() const { return m_array.empty(); }
+ void clear() { m_array.clear(); }
+
+ /** Push (trx_no, trx_sys.rseg_array index) into min-heap.
+ @param trx_no_rseg (trx_no << TRX_NO_SHIFT | (trx_sys.rseg_array index)) */
+ void push_trx_no_rseg(container_type::value_type trx_no_rseg)
+ {
+ m_array.push_back(trx_no_rseg);
+ std::push_heap(m_array.begin(), m_array.end(),
+ std::greater<container_type::value_type>());
+ }
+
+ /** Push rseg to priority queue.
+ @param trx_no trx_no of committed transaction
+ @param rseg rseg of committed transaction*/
+ void push(trx_id_t trx_no, const trx_rseg_t &rseg)
+ {
+ ut_ad(trx_no < 1ULL << (DATA_TRX_ID_LEN * CHAR_BIT));
+ ut_ad(&rseg >= trx_sys.rseg_array);
+ ut_ad(&rseg < trx_sys.rseg_array + TRX_SYS_N_RSEGS);
+ push_trx_no_rseg(trx_no << TRX_NO_SHIFT |
+ byte(&rseg - trx_sys.rseg_array));
+ }
+
+ /** Extracts rseg from (trx_no, trx_sys.rseg_array index) pair.
+ @param trx_no_rseg (trx_no << TRX_NO_SHIFT | (trx_sys.rseg_array index)
+ @return pointer to rseg in trx_sys.rseg_array */
+ static trx_rseg_t *rseg(container_type::value_type trx_no_rseg) {
+ byte i= static_cast<byte>(trx_no_rseg);
+ ut_ad(i < TRX_SYS_N_RSEGS);
+ return &trx_sys.rseg_array[i];
+ }
+
+ /** Pop rseg from priority queue.
+ @return pointer to popped trx_rseg_t object */
+ trx_rseg_t *pop()
+ {
+ ut_ad(!empty());
+ std::pop_heap(m_array.begin(), m_array.end(),
+ std::greater<container_type::value_type>());
+ trx_rseg_t *r = rseg(m_array.back());
+ m_array.pop_back();
+ return r;
+ }
+
+ /** Clone m_array.
+ @return m_array clone */
+ container_type clone_container() const{ return m_array; }
+
+ private:
+ /** Array of (trx_no, trx_sys.rseg_array index) pairs. */
+ container_type m_array;
+ };
+
+
public:
/** latch protecting view, m_enabled */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mutable srw_spin_lock latch;
@@ -244,15 +238,36 @@ private:
record */
uint16_t hdr_offset; /*!< Header byte offset on the page */
+ /** Binary min-heap of (trx_no, trx_sys.rseg_array index) pairs, ordered on
+ trx_no. It is protected by the pq_mutex */
+ purge_queue purge_queue;
+
+ /** Mutex protecting purge_queue */
+ mysql_mutex_t pq_mutex;
- TrxUndoRsegsIterator
- rseg_iter; /*!< Iterator to get the next rseg
- to process */
public:
- purge_pq_t purge_queue; /*!< Binary min-heap, ordered on
- TrxUndoRsegs::trx_no. It is protected
- by the pq_mutex */
- mysql_mutex_t pq_mutex; /*!< Mutex protecting purge_queue */
+
+ void enqueue(trx_id_t trx_no, const trx_rseg_t &rseg) {
+ mysql_mutex_assert_owner(&pq_mutex);
+ purge_queue.push(trx_no, rseg);
+ }
+
+ /** Push to purge queue without acquiring pq_mutex.
+ @param rseg rseg to push */
+ void enqueue(const trx_rseg_t &rseg) { enqueue(rseg.last_trx_no(), rseg); }
+
+ /** Clone purge queue container.
+ @return purge queue container clone */
+ purge_queue::container_type clone_queue_container() const {
+ mysql_mutex_assert_owner(&pq_mutex);
+ return purge_queue.clone_container();
+ }
+
+ /** Acquare purge_queue_mutex */
+ void queue_lock() { mysql_mutex_lock(&pq_mutex); }
+
+ /** Release purge queue mutex */
+ void queue_unlock() { mysql_mutex_unlock(&pq_mutex); }
/** innodb_undo_log_truncate=ON state;
only modified by purge_coordinator_callback() */
@@ -332,8 +347,9 @@ private:
/** Update the last not yet purged history log info in rseg when
we have purged a whole undo log. Advances also purge_trx_no
- past the purged log. */
- void rseg_get_next_history_log();
+ past the purged log.
+ @return whether anything is to be purged */
+ bool rseg_get_next_history_log();
public:
/**
@@ -438,6 +454,11 @@ public:
@param already_stopped True indicates purge threads were
already stopped */
void stop_FTS(const dict_table_t &table, bool already_stopped=false);
+
+ /** Cleanse purge queue to remove the rseg that reside in undo-tablespace
+ marked for truncate.
+ @param space undo tablespace being truncated */
+ void cleanse_purge_queue(const fil_space_t &space);
};
/** The global data structure coordinating a purge */
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 7fa43047..e0051b2a 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -59,7 +59,7 @@ struct alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_rseg_t
/** tablespace containing the rollback segment; constant after init() */
fil_space_t *space;
/** latch protecting everything except page_no, space */
- srw_spin_lock latch;
+ IF_DBUG(srw_lock_debug,srw_spin_lock) latch;
/** rollback segment header page number; constant after init() */
uint32_t page_no;
/** length of the TRX_RSEG_HISTORY list (number of transactions) */
@@ -170,19 +170,21 @@ public:
/** Last not yet purged undo log header; FIL_NULL if all purged */
uint32_t last_page_no;
- /** trx_t::no | last_offset << 48 */
+ /** trx_t::no << 16 | last_offset */
uint64_t last_commit_and_offset;
/** @return the commit ID of the last committed transaction */
trx_id_t last_trx_no() const
- { return last_commit_and_offset & ((1ULL << 48) - 1); }
+ { return last_commit_and_offset >> 16; }
/** @return header offset of the last committed transaction */
uint16_t last_offset() const
- { return static_cast<uint16_t>(last_commit_and_offset >> 48); }
+ {
+ return static_cast<uint16_t>(last_commit_and_offset);
+ }
void set_last_commit(uint16_t last_offset, trx_id_t trx_no)
{
- last_commit_and_offset= static_cast<uint64_t>(last_offset) << 48 | trx_no;
+ last_commit_and_offset= trx_no << 16 | static_cast<uint64_t>(last_offset);
}
/** @return the page identifier */
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 0a3e0d62..15255354 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -785,13 +785,19 @@ public:
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
- uint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
- bool check_foreigns; /*!< normally TRUE, but if the user
- wants to suppress foreign key checks,
- (in table imports, for example) we
- set this FALSE */
+ /** TRX_ISO_REPEATABLE_READ, ... */
+ unsigned isolation_level:2;
+ /** when set, REPEATABLE READ will actually be Snapshot Isolation, due to
+ detecting write/write conflicts and disabling "semi-consistent read" */
+ unsigned snapshot_isolation:1;
+ /** normally set; "SET foreign_key_checks=0" can be issued to suppress
+ foreign key checks, in table imports, for example */
+ unsigned check_foreigns:1;
+ /** normally set; "SET unique_checks=0, foreign_key_checks=0"
+ enables bulk insert into an empty table */
+ unsigned check_unique_secondary:1;
/** whether an insert into an empty table is active */
- bool bulk_insert;
+ unsigned bulk_insert:1;
/*------------------------------*/
/* MySQL has a transaction coordinator to coordinate two phase
commit between multiple storage engines and the binary log. When
@@ -805,13 +811,6 @@ public:
/** whether this is holding the prepare mutex */
bool active_commit_ordered;
/*------------------------------*/
- bool check_unique_secondary;
- /*!< normally TRUE, but if the user
- wants to speed up inserts by
- suppressing unique key checks
- for secondary indexes when we decide
- if we can use the insert buffer for
- them, we set this FALSE */
bool flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
@@ -1189,10 +1188,16 @@ public:
return UNIV_UNLIKELY(bulk_insert) ? bulk_insert_apply_low(): DB_SUCCESS;
}
+ /** Do the bulk insert for the buffered insert operation of a table.
+ @param table bulk insert operation
+ @return DB_SUCCESS or error code. */
+ dberr_t bulk_insert_apply_for_table(dict_table_t *table);
private:
/** Apply the buffered bulk inserts. */
dberr_t bulk_insert_apply_low();
+ /** Rollback the bulk insert operation for the transaction */
+ void bulk_rollback_low();
/** Assign a rollback segment for modifying temporary tables.
@return the assigned rollback segment */
trx_rseg_t *assign_temp_rseg();
diff --git a/storage/innobase/include/trx0undo.inl b/storage/innobase/include/trx0undo.inl
index 9f05989f..023e2b98 100644
--- a/storage/innobase/include/trx0undo.inl
+++ b/storage/innobase/include/trx0undo.inl
@@ -125,5 +125,6 @@ trx_undo_page_get_next_rec(const buf_block_t *undo_page, uint16_t rec,
{
uint16_t end= trx_undo_page_get_end(undo_page, page_no, offset);
uint16_t next= mach_read_from_2(undo_page->page.frame + rec);
- return next == end ? nullptr : undo_page->page.frame + next;
+ ut_ad(next <= end);
+ return next >= end ? nullptr : undo_page->page.frame + next;
}
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
index f4183e4c..3ff5f885 100644
--- a/storage/innobase/include/ut0new.h
+++ b/storage/innobase/include/ut0new.h
@@ -1071,9 +1071,8 @@ static inline void *ut_malloc_dontdump(size_t n_bytes, ...)
{
void *ptr = my_large_malloc(&n_bytes, MYF(0));
- ut_dontdump(ptr, n_bytes, true);
-
if (ptr) {
+ ut_dontdump(ptr, n_bytes, true);
os_total_large_mem_allocated += n_bytes;
}
return ptr;
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index fe16ce14..500b6455 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -242,20 +242,6 @@ ut_print_name(
FILE* ef, /*!< in: stream */
const trx_t* trx, /*!< in: transaction */
const char* name); /*!< in: table name to print */
-/** Format a table name, quoted as an SQL identifier.
-If the name contains a slash '/', the result will contain two
-identifiers separated by a period (.), as in SQL
-database_name.table_name.
-@see table_name_t
-@param[in] name table or index name
-@param[out] formatted formatted result, will be NUL-terminated
-@param[in] formatted_size size of the buffer in bytes
-@return pointer to 'formatted' */
-char*
-ut_format_name(
- const char* name,
- char* formatted,
- ulint formatted_size);
/**********************************************************************//**
Catenate files. */
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index f4660f96..ad43e1c8 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -201,15 +201,6 @@ ib_vector_last_const(
const ib_vector_t* vec); /* in: vector */
/********************************************************************
-Sort the vector elements. */
-UNIV_INLINE
-void
-ib_vector_sort(
-/*===========*/
- ib_vector_t* vec, /* in/out: vector */
- ib_compare_t compare); /* in: the comparator to use for sort */
-
-/********************************************************************
The default ib_vector_t heap free. Does nothing. */
UNIV_INLINE
void
diff --git a/storage/innobase/include/ut0vec.inl b/storage/innobase/include/ut0vec.inl
index 531f0f22..1a844dd8 100644
--- a/storage/innobase/include/ut0vec.inl
+++ b/storage/innobase/include/ut0vec.inl
@@ -305,19 +305,6 @@ ib_vector_remove(
}
/********************************************************************
-Sort the vector elements. */
-UNIV_INLINE
-void
-ib_vector_sort(
-/*===========*/
- /* out: void */
- ib_vector_t* vec, /* in: vector */
- ib_compare_t compare)/* in: the comparator to use for sort */
-{
- qsort(vec->data, vec->used, vec->sizeof_value, compare);
-}
-
-/********************************************************************
Destroy the vector. Make sure the vector owns the allocator, e.g.,
the heap in the the heap allocator. */
UNIV_INLINE