Adding upstream version 1:10.11.6.upstream/1%10.11.6

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 12:24:36 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 12:24:36 +0000
commit: 06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
tree: e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/lock
parent: Initial commit. (diff)
download: mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz
mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip
3 files changed, 7828 insertions, 0 deletions
diff --git a/storage/innobase/lock/lock0iter.cc b/storage/innobase/lock/lock0iter.cc
new file mode 100644
index 00000000..0cd271bf
--- /dev/null
+++ b/storage/innobase/lock/lock0iter.cc
@@ -0,0 +1,88 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2020, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0iter.cc
+Lock queue iterator. Can iterate over table and record
+lock queues.
+
+Created July 16, 2007 Vasil Dimov
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "dict0mem.h"
+#include "lock0iter.h"
+#include "lock0lock.h"
+#include "lock0priv.h"
+
+/*******************************************************************//**
+Initialize lock queue iterator so that it starts to iterate from
+"lock". bit_no specifies the record number within the heap where the
+record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
+1. If the lock is a table lock, thus we have a table lock queue;
+2. If the lock is a record lock and it is a wait lock. In this case
+   bit_no is calculated in this function by using
+   lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
+   of a wait lock. */
+void
+lock_queue_iterator_reset(
+/*======================*/
+	lock_queue_iterator_t*	iter,	/*!< out: iterator */
+	const lock_t*		lock,	/*!< in: lock to start from */
+	ulint			bit_no)	/*!< in: record number in the
+					heap */
+{
+  lock_sys.assert_locked(*lock);
+
+  iter->current_lock = lock;
+
+  if (bit_no != ULINT_UNDEFINED);
+  else if (lock->is_table())
+    bit_no= ULINT_UNDEFINED;
+  else
+  {
+    bit_no= lock_rec_find_set_bit(lock);
+    ut_ad(bit_no != ULINT_UNDEFINED);
+  }
+
+  iter->bit_no= bit_no;
+}
+
+/*******************************************************************//**
+Gets the previous lock in the lock queue, returns NULL if there are no
+more locks (i.e. the current lock is the first one). The iterator is
+receded (if not-NULL is returned).
+@return previous lock or NULL */
+const lock_t*
+lock_queue_iterator_get_prev(
+/*=========================*/
+	lock_queue_iterator_t*	iter)	/*!< in/out: iterator */
+{
+  lock_sys.assert_locked(*iter->current_lock);
+
+  const lock_t *prev_lock= !iter->current_lock->is_table()
+    ? lock_rec_get_prev(iter->current_lock, iter->bit_no)
+    : UT_LIST_GET_PREV(un_member.tab_lock.locks, iter->current_lock);
+
+  if (prev_lock)
+    iter->current_lock= prev_lock;
+
+  return prev_lock;
+}
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
new file mode 100644
index 00000000..df51ceb1
--- /dev/null
+++ b/storage/innobase/lock/lock0lock.cc
@@ -0,0 +1,6812 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2022, Oracle and/or its affiliates.
+Copyright (c) 2014, 2023, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0lock.cc
+The transaction lock system
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "univ.i"
+
+#include <mysql/service_thd_error_context.h>
+#include <mysql/service_thd_wait.h>
+#include <sql_class.h>
+
+#include "lock0lock.h"
+#include "lock0priv.h"
+#include "dict0mem.h"
+#include "trx0purge.h"
+#include "trx0sys.h"
+#include "ut0vec.h"
+#include "btr0cur.h"
+#include "row0sel.h"
+#include "row0mysql.h"
+#include "row0vers.h"
+#include "pars0pars.h"
+#include "srv0mon.h"
+#include "que0que.h"
+#include "scope.h"
+#include <debug_sync.h>
+
+#include <set>
+
+#ifdef WITH_WSREP
+#include <mysql/service_wsrep.h>
+#endif /* WITH_WSREP */
+
+/** The value of innodb_deadlock_detect */
+my_bool innodb_deadlock_detect;
+/** The value of innodb_deadlock_report */
+ulong innodb_deadlock_report;
+
+#ifdef HAVE_REPLICATION
+extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
+extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
+extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
+extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+#endif
+
+/** Functor for accessing the embedded node within a table lock. */
+struct TableLockGetNode
+{
+  ut_list_node<lock_t> &operator()(lock_t &elem)
+  { return(elem.un_member.tab_lock.locks); }
+};
+
+/** Create the hash table.
+@param n  the lower bound of n_cells */
+void lock_sys_t::hash_table::create(ulint n)
+{
+  n_cells= ut_find_prime(n);
+  const size_t size= MY_ALIGN(pad(n_cells) * sizeof *array,
+                              CPU_LEVEL1_DCACHE_LINESIZE);
+  void *v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE);
+  memset_aligned<CPU_LEVEL1_DCACHE_LINESIZE>(v, 0, size);
+  array= static_cast<hash_cell_t*>(v);
+}
+
+/** Resize the hash table.
+@param n  the lower bound of n_cells */
+void lock_sys_t::hash_table::resize(ulint n)
+{
+  ut_ad(lock_sys.is_writer());
+  ulint new_n_cells= ut_find_prime(n);
+  const size_t size= MY_ALIGN(pad(new_n_cells) * sizeof *array,
+                              CPU_LEVEL1_DCACHE_LINESIZE);
+  void *v= aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE);
+  memset_aligned<CPU_LEVEL1_DCACHE_LINESIZE>(v, 0, size);
+  hash_cell_t *new_array= static_cast<hash_cell_t*>(v);
+
+  for (auto i= pad(n_cells); i--; )
+  {
+    if (lock_t *lock= static_cast<lock_t*>(array[i].node))
+    {
+      /* all hash_latch must vacated */
+      ut_ad(i % (ELEMENTS_PER_LATCH + LATCH) >= LATCH);
+      do
+      {
+        ut_ad(!lock->is_table());
+        hash_cell_t *c= calc_hash(lock->un_member.rec_lock.page_id.fold(),
+                                  new_n_cells) + new_array;
+        lock_t *next= lock->hash;
+        lock->hash= nullptr;
+        if (!c->node)
+          c->node= lock;
+        else if (!lock->is_waiting())
+        {
+          lock->hash= static_cast<lock_t*>(c->node);
+          c->node= lock;
+        }
+        else
+        {
+          lock_t *next= static_cast<lock_t*>(c->node);
+          while (next->hash)
+            next= next->hash;
+          next->hash= lock;
+        }
+        lock= next;
+      }
+      while (lock);
+    }
+  }
+
+  aligned_free(array);
+  array= new_array;
+  n_cells= new_n_cells;
+}
+
+#ifdef SUX_LOCK_GENERIC
+void lock_sys_t::hash_latch::wait()
+{
+  pthread_mutex_lock(&lock_sys.hash_mutex);
+  while (!write_trylock())
+    pthread_cond_wait(&lock_sys.hash_cond, &lock_sys.hash_mutex);
+  pthread_mutex_unlock(&lock_sys.hash_mutex);
+}
+
+void lock_sys_t::hash_latch::release()
+{
+  pthread_mutex_lock(&lock_sys.hash_mutex);
+  write_unlock();
+  pthread_cond_signal(&lock_sys.hash_cond);
+  pthread_mutex_unlock(&lock_sys.hash_mutex);
+}
+#endif
+
+#ifdef UNIV_DEBUG
+/** Assert that a lock shard is exclusively latched by this thread */
+void lock_sys_t::assert_locked(const lock_t &lock) const
+{
+  ut_ad(this == &lock_sys);
+  if (is_writer())
+    return;
+  if (lock.is_table())
+    assert_locked(*lock.un_member.tab_lock.table);
+  else
+    lock_sys.hash_get(lock.type_mode).
+      assert_locked(lock.un_member.rec_lock.page_id);
+}
+
+/** Assert that a table lock shard is exclusively latched by this thread */
+void lock_sys_t::assert_locked(const dict_table_t &table) const
+{
+  ut_ad(!table.is_temporary());
+  if (is_writer())
+    return;
+  ut_ad(readers);
+  ut_ad(table.lock_mutex_is_owner());
+}
+
+/** Assert that hash cell for page is exclusively latched by this thread */
+void lock_sys_t::hash_table::assert_locked(const page_id_t id) const
+{
+  if (lock_sys.is_writer())
+    return;
+  ut_ad(lock_sys.readers);
+  ut_ad(latch(cell_get(id.fold()))->is_locked());
+}
+
+/** Assert that a hash table cell is exclusively latched (by some thread) */
+void lock_sys_t::assert_locked(const hash_cell_t &cell) const
+{
+  if (is_writer())
+    return;
+  ut_ad(lock_sys.readers);
+  ut_ad(hash_table::latch(const_cast<hash_cell_t*>(&cell))->is_locked());
+}
+#endif
+
+LockGuard::LockGuard(lock_sys_t::hash_table &hash, page_id_t id)
+{
+  const auto id_fold= id.fold();
+  lock_sys.rd_lock(SRW_LOCK_CALL);
+  cell_= hash.cell_get(id_fold);
+  hash.latch(cell_)->acquire();
+}
+
+LockMultiGuard::LockMultiGuard(lock_sys_t::hash_table &hash,
+                               const page_id_t id1, const page_id_t id2)
+{
+  ut_ad(id1.space() == id2.space());
+  const auto id1_fold= id1.fold(), id2_fold= id2.fold();
+  lock_sys.rd_lock(SRW_LOCK_CALL);
+  cell1_= hash.cell_get(id1_fold);
+  cell2_= hash.cell_get(id2_fold);
+
+  auto latch1= hash.latch(cell1_), latch2= hash.latch(cell2_);
+  if (latch1 > latch2)
+    std::swap(latch1, latch2);
+  latch1->acquire();
+  if (latch1 != latch2)
+    latch2->acquire();
+}
+
+LockMultiGuard::~LockMultiGuard()
+{
+  auto latch1= lock_sys_t::hash_table::latch(cell1_),
+    latch2= lock_sys_t::hash_table::latch(cell2_);
+  latch1->release();
+  if (latch1 != latch2)
+    latch2->release();
+  /* Must be last, to avoid a race with lock_sys_t::hash_table::resize() */
+  lock_sys.rd_unlock();
+}
+
+TRANSACTIONAL_TARGET
+TMLockGuard::TMLockGuard(lock_sys_t::hash_table &hash, page_id_t id)
+{
+  const auto id_fold= id.fold();
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+  if (xbegin())
+  {
+    if (lock_sys.latch.is_write_locked())
+      xabort();
+    cell_= hash.cell_get(id_fold);
+    if (hash.latch(cell_)->is_locked())
+      xabort();
+    elided= true;
+    return;
+  }
+  elided= false;
+#endif
+  lock_sys.rd_lock(SRW_LOCK_CALL);
+  cell_= hash.cell_get(id_fold);
+  hash.latch(cell_)->acquire();
+}
+
+/** Pretty-print a table lock.
+@param[in,out]	file	output stream
+@param[in]	lock	table lock */
+static void lock_table_print(FILE* file, const lock_t* lock);
+
+/** Pretty-print a record lock.
+@param[in,out]	file	output stream
+@param[in]	lock	record lock
+@param[in,out]	mtr	mini-transaction for accessing the record */
+static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr);
+
+namespace Deadlock
+{
+  /** Whether to_check may be nonempty */
+  static Atomic_relaxed<bool> to_be_checked;
+  /** Transactions to check for deadlock. Protected by lock_sys.wait_mutex. */
+  static std::set<trx_t*> to_check;
+
+  MY_ATTRIBUTE((nonnull, warn_unused_result))
+  /** Check if a lock request results in a deadlock.
+  Resolve a deadlock by choosing a transaction that will be rolled back.
+  @param trx        transaction requesting a lock
+  @param wait_lock  the lock being requested
+  @return the lock that trx is or was waiting for
+  @retval nullptr if the lock wait was resolved
+  @retval -1 if trx must report DB_DEADLOCK */
+  static lock_t *check_and_resolve(trx_t *trx, lock_t *wait_lock);
+
+  /** Quickly detect a deadlock using Brent's cycle detection algorithm.
+  @param trx     transaction that is waiting for another transaction
+  @return a transaction that is part of a cycle
+  @retval nullptr if no cycle was found */
+  inline trx_t *find_cycle(trx_t *trx)
+  {
+    mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+    trx_t *tortoise= trx, *hare= trx;
+    for (unsigned power= 1, l= 1; (hare= hare->lock.wait_trx) != nullptr; l++)
+    {
+      if (tortoise == hare)
+      {
+        ut_ad(l > 1);
+        lock_sys.deadlocks++;
+        /* Note: Normally, trx should be part of any deadlock cycle
+        that is found. However, if innodb_deadlock_detect=OFF had been
+        in effect in the past, it is possible that trx will be waiting
+        for a transaction that participates in a pre-existing deadlock
+        cycle. In that case, our victim will not be trx. */
+        return hare;
+      }
+      if (l == power)
+      {
+        /* The maximum concurrent number of TRX_STATE_ACTIVE transactions
+        is TRX_RSEG_N_SLOTS * 128, or innodb_page_size / 16 * 128
+        (default: 131,072, maximum: 524,288).
+        Our maximum possible number of iterations should be twice that. */
+        power<<= 1;
+        l= 0;
+        tortoise= hare;
+      }
+    }
+    return nullptr;
+  }
+};
+
+#ifdef UNIV_DEBUG
+/** Validate the transactional locks. */
+static void lock_validate();
+
+/** Validate the record lock queues on a page.
+@param block    buffer pool block
+@param latched  whether the tablespace latch may be held
+@return true if ok */
+static bool lock_rec_validate_page(const buf_block_t *block, bool latched)
+  MY_ATTRIBUTE((nonnull, warn_unused_result));
+#endif /* UNIV_DEBUG */
+
+/* The lock system */
+lock_sys_t lock_sys;
+
+/** Only created if !srv_read_only_mode. Protected by lock_sys.latch. */
+static FILE *lock_latest_err_file;
+
+/*********************************************************************//**
+Reports that a transaction id is insensible, i.e., in the future. */
+ATTRIBUTE_COLD
+void
+lock_report_trx_id_insanity(
+/*========================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec, index) */
+	trx_id_t	max_trx_id)	/*!< in: trx_sys.get_max_trx_id() */
+{
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!rec_is_metadata(rec, *index));
+
+	ib::error()
+		<< "Transaction id " << ib::hex(trx_id)
+		<< " associated with record" << rec_offsets_print(rec, offsets)
+		<< " in index " << index->name
+		<< " of table " << index->table->name
+		<< " is greater than the global counter " << max_trx_id
+		<< "! The table is corrupted.";
+}
+
+/*********************************************************************//**
+Checks that a transaction id is sensible, i.e., not in the future.
+@return true if ok */
+bool
+lock_check_trx_id_sanity(
+/*=====================*/
+	trx_id_t	trx_id,		/*!< in: trx id */
+	const rec_t*	rec,		/*!< in: user record */
+	dict_index_t*	index,		/*!< in: index */
+	const rec_offs*	offsets)	/*!< in: rec_get_offsets(rec, index) */
+{
+  ut_ad(rec_offs_validate(rec, index, offsets));
+  ut_ad(!rec_is_metadata(rec, *index));
+
+  trx_id_t max_trx_id= trx_sys.get_max_trx_id();
+  ut_ad(max_trx_id || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN);
+
+  if (UNIV_LIKELY(max_trx_id != 0) && UNIV_UNLIKELY(trx_id >= max_trx_id))
+  {
+    lock_report_trx_id_insanity(trx_id, rec, index, offsets, max_trx_id);
+    return false;
+  }
+  return true;
+}
+
+
+/**
+  Creates the lock system at database start.
+
+  @param[in] n_cells number of slots in lock hash table
+*/
+void lock_sys_t::create(ulint n_cells)
+{
+  ut_ad(this == &lock_sys);
+  ut_ad(!is_initialised());
+
+  m_initialised= true;
+
+  latch.SRW_LOCK_INIT(lock_latch_key);
+#ifdef __aarch64__
+  mysql_mutex_init(lock_wait_mutex_key, &wait_mutex, MY_MUTEX_INIT_FAST);
+#else
+  mysql_mutex_init(lock_wait_mutex_key, &wait_mutex, nullptr);
+#endif
+#ifdef SUX_LOCK_GENERIC
+  pthread_mutex_init(&hash_mutex, nullptr);
+  pthread_cond_init(&hash_cond, nullptr);
+#endif
+
+  rec_hash.create(n_cells);
+  prdt_hash.create(n_cells);
+  prdt_page_hash.create(n_cells);
+
+  if (!srv_read_only_mode)
+  {
+    lock_latest_err_file= os_file_create_tmpfile();
+    ut_a(lock_latest_err_file);
+  }
+}
+
+#ifdef UNIV_PFS_RWLOCK
+/** Acquire exclusive lock_sys.latch */
+void lock_sys_t::wr_lock(const char *file, unsigned line)
+{
+  mysql_mutex_assert_not_owner(&wait_mutex);
+  latch.wr_lock(file, line);
+  ut_ad(!writer.exchange(pthread_self(), std::memory_order_relaxed));
+}
+/** Release exclusive lock_sys.latch */
+void lock_sys_t::wr_unlock()
+{
+  ut_ad(writer.exchange(0, std::memory_order_relaxed) ==
+        pthread_self());
+  latch.wr_unlock();
+}
+
+/** Acquire shared lock_sys.latch */
+void lock_sys_t::rd_lock(const char *file, unsigned line)
+{
+  mysql_mutex_assert_not_owner(&wait_mutex);
+  latch.rd_lock(file, line);
+  ut_ad(!writer.load(std::memory_order_relaxed));
+  ut_d(readers.fetch_add(1, std::memory_order_relaxed));
+}
+
+/** Release shared lock_sys.latch */
+void lock_sys_t::rd_unlock()
+{
+  ut_ad(!writer.load(std::memory_order_relaxed));
+  ut_ad(readers.fetch_sub(1, std::memory_order_relaxed));
+  latch.rd_unlock();
+}
+#endif
+
+/**
+  Resize the lock hash table.
+
+  @param[in] n_cells number of slots in lock hash table
+*/
+void lock_sys_t::resize(ulint n_cells)
+{
+  ut_ad(this == &lock_sys);
+  /* Buffer pool resizing is rarely initiated by the user, and this
+  would exceed the maximum size of a memory transaction. */
+  LockMutexGuard g{SRW_LOCK_CALL};
+  rec_hash.resize(n_cells);
+  prdt_hash.resize(n_cells);
+  prdt_page_hash.resize(n_cells);
+}
+
+/** Closes the lock system at database shutdown. */
+void lock_sys_t::close()
+{
+  ut_ad(this == &lock_sys);
+
+  if (!m_initialised)
+    return;
+
+  if (lock_latest_err_file)
+  {
+    my_fclose(lock_latest_err_file, MYF(MY_WME));
+    lock_latest_err_file= nullptr;
+  }
+
+  rec_hash.free();
+  prdt_hash.free();
+  prdt_page_hash.free();
+#ifdef SUX_LOCK_GENERIC
+  pthread_mutex_destroy(&hash_mutex);
+  pthread_cond_destroy(&hash_cond);
+#endif
+
+  latch.destroy();
+  mysql_mutex_destroy(&wait_mutex);
+
+  Deadlock::to_check.clear();
+  Deadlock::to_be_checked= false;
+
+  m_initialised= false;
+}
+
+#ifdef WITH_WSREP
+# ifdef UNIV_DEBUG
+/** Check if both conflicting lock transaction and other transaction
+requesting record lock are brute force (BF). If they are check is
+this BF-BF wait correct and if not report BF wait and assert.
+
+@param[in]	lock_rec	other waiting record lock
+@param[in]	trx		trx requesting conflicting record lock
+*/
+static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx)
+{
+	ut_ad(!lock->is_table());
+	lock_sys.assert_locked(*lock);
+	trx_t* lock_trx= lock->trx;
+
+	/* Note that we are holding lock_sys.latch, thus we should
+	not acquire THD::LOCK_thd_data mutex below to avoid latching
+	order violation. */
+
+	if (!trx->is_wsrep() || !lock_trx->is_wsrep())
+		return;
+	if (UNIV_LIKELY(!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
+	    || UNIV_LIKELY(!wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE)))
+		return;
+
+	ut_ad(trx->state == TRX_STATE_ACTIVE);
+
+	switch (lock_trx->state) {
+	case TRX_STATE_COMMITTED_IN_MEMORY:
+		/* The state change is only protected by trx_t::mutex,
+		which we are not even holding here. */
+	case TRX_STATE_PREPARED:
+		/* Wait for lock->trx to complete the commit
+		(or XA ROLLBACK) and to release the lock. */
+		return;
+	case TRX_STATE_ACTIVE:
+		break;
+	default:
+		ut_ad("invalid state" == 0);
+	}
+
+	/* If BF - BF order is honored, i.e. trx already holding
+	record lock should be ordered before this new lock request
+	we can keep trx waiting for the lock. If conflicting
+	transaction is already aborting or rolling back for replaying
+	we can also let new transaction waiting. */
+	if (wsrep_thd_order_before(lock_trx->mysql_thd, trx->mysql_thd)
+	    || wsrep_thd_is_aborting(lock_trx->mysql_thd)) {
+		return;
+	}
+
+	mtr_t mtr;
+
+	ib::error() << "Conflicting lock on table: "
+		    << lock->index->table->name
+		    << " index: "
+		    << lock->index->name()
+		    << " that has lock ";
+	lock_rec_print(stderr, lock, mtr);
+
+	ib::error() << "WSREP state: ";
+
+	wsrep_report_bf_lock_wait(trx->mysql_thd,
+				  trx->id);
+	wsrep_report_bf_lock_wait(lock_trx->mysql_thd,
+				  lock_trx->id);
+	/* BF-BF wait is a bug */
+	ut_error;
+}
+# endif /* UNIV_DEBUG */
+
+/** check if lock timeout was for priority thread,
+as a side effect trigger lock monitor
+@param trx    transaction owning the lock
+@return false for regular lock timeout */
+ATTRIBUTE_NOINLINE static bool wsrep_is_BF_lock_timeout(const trx_t &trx)
+{
+  ut_ad(trx.is_wsrep());
+
+  if (trx.error_state == DB_DEADLOCK || !srv_monitor_timer ||
+      !wsrep_thd_is_BF(trx.mysql_thd, false))
+    return false;
+
+  ib::info() << "WSREP: BF lock wait long for trx:" << ib::hex(trx.id)
+             << " query: " << wsrep_thd_query(trx.mysql_thd);
+  return true;
+}
+#endif /* WITH_WSREP */
+
+/*********************************************************************//**
+Checks if a lock request for a new lock has to wait for request lock2.
+@return TRUE if new lock has to wait for lock2 to be removed */
+UNIV_INLINE
+bool
+lock_rec_has_to_wait(
+/*=================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	unsigned	type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
+				LOCK_INSERT_INTENTION */
+	const lock_t*	lock2,	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+	bool		lock_is_on_supremum)
+				/*!< in: TRUE if we are setting the
+				lock on the 'supremum' record of an
+				index page: we know then that the lock
+				request is really for a 'gap' type lock */
+{
+	ut_ad(trx);
+	ut_ad(!lock2->is_table());
+	ut_d(lock_sys.hash_get(type_mode).assert_locked(
+		     lock2->un_member.rec_lock.page_id));
+
+	if (trx == lock2->trx
+	    || lock_mode_compatible(
+		       static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
+		       lock2->mode())) {
+		return false;
+	}
+
+	/* We have somewhat complex rules when gap type record locks
+	cause waits */
+
+	if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
+	    && !(type_mode & LOCK_INSERT_INTENTION)) {
+
+		/* Gap type locks without LOCK_INSERT_INTENTION flag
+		do not need to wait for anything. This is because
+		different users can have conflicting lock types
+		on gaps. */
+
+		return false;
+	}
+
+	if (!(type_mode & LOCK_INSERT_INTENTION) && lock2->is_gap()) {
+
+		/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
+		does not need to wait for a gap type lock */
+
+		return false;
+	}
+
+	if ((type_mode & LOCK_GAP) && lock2->is_record_not_gap()) {
+
+		/* Lock on gap does not need to wait for
+		a LOCK_REC_NOT_GAP type lock */
+
+		return false;
+	}
+
+	if (lock2->is_insert_intention()) {
+		/* No lock request needs to wait for an insert
+		intention lock to be removed. This is ok since our
+		rules allow conflicting locks on gaps. This eliminates
+		a spurious deadlock caused by a next-key lock waiting
+		for an insert intention lock; when the insert
+		intention lock was granted, the insert deadlocked on
+		the waiting next-key lock.
+
+		Also, insert intention locks do not disturb each
+		other. */
+
+		return false;
+	}
+
+#ifdef HAVE_REPLICATION
+	if ((type_mode & LOCK_GAP || lock2->is_gap())
+	    && !thd_need_ordering_with(trx->mysql_thd, lock2->trx->mysql_thd)) {
+		/* If the upper server layer has already decided on the
+		commit order between the transaction requesting the
+		lock and the transaction owning the lock, we do not
+		need to wait for gap locks. Such ordeering by the upper
+		server layer happens in parallel replication, where the
+		commit order is fixed to match the original order on the
+		master.
+
+		Such gap locks are mainly needed to get serialisability
+		between transactions so that they will be binlogged in
+		the correct order so that statement-based replication
+		will give the correct results. Since the right order
+		was already determined on the master, we do not need
+		to enforce it again here.
+
+		Skipping the locks is not essential for correctness,
+		since in case of deadlock we will just kill the later
+		transaction and retry it. But it can save some
+		unnecessary rollbacks and retries. */
+
+		return false;
+	}
+#endif /* HAVE_REPLICATION */
+
+#ifdef WITH_WSREP
+	/* New lock request from a transaction is using unique key
+	scan and this transaction is a wsrep high priority transaction
+	(brute force). If conflicting transaction is also wsrep high
+	priority transaction we should avoid lock conflict because
+	ordering of these transactions is already decided and
+	conflicting transaction will be later replayed. */
+	if (trx->is_wsrep_UK_scan()
+	    && wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) {
+		return false;
+	}
+
+	/* if BF-BF conflict, we have to look at write set order */
+	if (trx->is_wsrep() &&
+	   (type_mode & LOCK_MODE_MASK) == LOCK_X &&
+	   (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X &&
+	   wsrep_thd_order_before(trx->mysql_thd,
+				  lock2->trx->mysql_thd)) {
+		return false;
+	}
+
+	/* We very well can let bf to wait normally as other
+	BF will be replayed in case of conflict. For debug
+	builds we will do additional sanity checks to catch
+	unsupported bf wait if any. */
+	ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
+#endif /* WITH_WSREP */
+
+	return true;
+}
+
+/*********************************************************************//**
+Checks if a lock request lock1 has to wait for request lock2.
+@return TRUE if lock1 has to wait for lock2 to be removed */
+bool
+lock_has_to_wait(
+/*=============*/
+	const lock_t*	lock1,	/*!< in: waiting lock */
+	const lock_t*	lock2)	/*!< in: another lock; NOTE that it is
+				assumed that this has a lock bit set
+				on the same record as in lock1 if the
+				locks are record locks */
+{
+	ut_ad(lock1 && lock2);
+
+	if (lock1->trx == lock2->trx
+	    || lock_mode_compatible(lock1->mode(), lock2->mode())) {
+		return false;
+	}
+
+	if (lock1->is_table()) {
+		return true;
+	}
+
+	ut_ad(!lock2->is_table());
+
+	if (lock1->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
+		return lock_prdt_has_to_wait(lock1->trx, lock1->type_mode,
+					     lock_get_prdt_from_lock(lock1),
+					     lock2);
+	}
+
+	return lock_rec_has_to_wait(
+		lock1->trx, lock1->type_mode, lock2,
+		lock_rec_get_nth_bit(lock1, PAGE_HEAP_NO_SUPREMUM));
+}
+
+/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
+
+/**********************************************************************//**
+Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
+if none found.
+@return bit index == heap number of the record, or ULINT_UNDEFINED if
+none found */
+ulint
+lock_rec_find_set_bit(
+/*==================*/
+	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
+{
+	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+
+		if (lock_rec_get_nth_bit(lock, i)) {
+
+			return(i);
+		}
+	}
+
+	return(ULINT_UNDEFINED);
+}
+
+/*********************************************************************//**
+Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
+pointer in the transaction! This function is used in lock object creation
+and resetting. */
+static
+void
+lock_rec_bitmap_reset(
+/*==================*/
+	lock_t*	lock)	/*!< in: record lock */
+{
+	ulint	n_bytes;
+
+	ut_ad(!lock->is_table());
+
+	/* Reset to zero the bitmap which resides immediately after the lock
+	struct */
+
+	n_bytes = lock_rec_get_n_bits(lock) / 8;
+
+	ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
+
+	memset(reinterpret_cast<void*>(&lock[1]), 0, n_bytes);
+}
+
+/*********************************************************************//**
+Copies a record lock to heap.
+@return copy of lock */
+static
+lock_t*
+lock_rec_copy(
+/*==========*/
+	const lock_t*	lock,	/*!< in: record lock */
+	mem_heap_t*	heap)	/*!< in: memory heap */
+{
+	ulint	size;
+
+	ut_ad(!lock->is_table());
+
+	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
+
+	return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
+}
+
+/*********************************************************************//**
+Gets the previous record lock set on a record.
+@return previous lock on the same record, NULL if none exists */
+const lock_t*
+lock_rec_get_prev(
+/*==============*/
+	const lock_t*	in_lock,/*!< in: record lock */
+	ulint		heap_no)/*!< in: heap number of the record */
+{
+  ut_ad(!in_lock->is_table());
+  const page_id_t id{in_lock->un_member.rec_lock.page_id};
+  hash_cell_t *cell= lock_sys.hash_get(in_lock->type_mode).cell_get(id.fold());
+
+  for (lock_t *lock= lock_sys_t::get_first(*cell, id); lock != in_lock;
+       lock= lock_rec_get_next_on_page(lock))
+    if (lock_rec_get_nth_bit(lock, heap_no))
+      return lock;
+
+  return nullptr;
+}
+
+/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
+
+/*********************************************************************//**
+Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
+to precise_mode.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_rec_has_expl(
+/*==============*/
+	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
+					possibly ORed to LOCK_GAP or
+					LOCK_REC_NOT_GAP, for a
+					supremum record we regard this
+					always a gap type request */
+	const hash_cell_t&	cell,	/*!< in: lock hash table cell */
+	const page_id_t		id,	/*!< in: page identifier */
+	ulint			heap_no,/*!< in: heap number of the record */
+	const trx_t*		trx)	/*!< in: transaction */
+{
+  ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
+	|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
+  ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
+
+  for (lock_t *lock= lock_sys_t::get_first(cell, id, heap_no); lock;
+       lock= lock_rec_get_next(heap_no, lock))
+    if (lock->trx == trx &&
+	!(lock->type_mode & (LOCK_WAIT | LOCK_INSERT_INTENTION)) &&
+	(!((LOCK_REC_NOT_GAP | LOCK_GAP) & lock->type_mode) ||
+	 heap_no == PAGE_HEAP_NO_SUPREMUM ||
+	 ((LOCK_REC_NOT_GAP | LOCK_GAP) & precise_mode & lock->type_mode)) &&
+	lock_mode_stronger_or_eq(lock->mode(), static_cast<lock_mode>
+				 (precise_mode & LOCK_MODE_MASK)))
+      return lock;
+
+  return nullptr;
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Checks if some other transaction has a lock request in the queue.
+@return lock or NULL */
+static
+lock_t*
+lock_rec_other_has_expl_req(
+/*========================*/
+	lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
+	const hash_cell_t&	cell,	/*!< in: lock hash table cell */
+	const page_id_t		id,	/*!< in: page identifier */
+	bool			wait,	/*!< in: whether also waiting locks
+					are taken into account */
+	ulint			heap_no,/*!< in: heap number of the record */
+	const trx_t*		trx)	/*!< in: transaction, or NULL if
+					requests by all transactions
+					are taken into account */
+{
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+	/* Only GAP lock can be on SUPREMUM, and we are not looking for
+	GAP lock */
+	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
+		return(NULL);
+	}
+
+	for (lock_t* lock = lock_sys_t::get_first(cell, id, heap_no);
+	     lock; lock = lock_rec_get_next(heap_no, lock)) {
+		if (lock->trx != trx
+		    && !lock->is_gap()
+		    && (!lock->is_waiting() || wait)
+		    && lock_mode_stronger_or_eq(lock->mode(), mode)) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+#endif /* UNIV_DEBUG */
+
+#ifdef WITH_WSREP
+void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id);
+
+#ifdef UNIV_DEBUG
+void wsrep_report_error(const lock_t* victim_lock, const trx_t *bf_trx)
+{
+  // We have conflicting BF-BF case, these threads
+  // should not execute concurrently
+  mtr_t mtr;
+  WSREP_ERROR("BF request is not compatible with victim");
+  WSREP_ERROR("BF requesting lock: ");
+  lock_rec_print(stderr, bf_trx->lock.wait_lock, mtr);
+  WSREP_ERROR("victim holding lock: ");
+  lock_rec_print(stderr, victim_lock, mtr);
+  wsrep_assert_no_bf_bf_wait(victim_lock, bf_trx);
+}
+#endif /* WITH_DEBUG */
+
+/** Kill the holders of conflicting locks.
+@param trx   brute-force applier transaction running in the current thread */
+ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
+static void lock_wait_wsrep(trx_t *trx)
+{
+  DBUG_ASSERT(wsrep_on(trx->mysql_thd));
+  if (!wsrep_thd_is_BF(trx->mysql_thd, false))
+    return;
+
+  std::set<trx_t*> victims;
+
+  lock_sys.wr_lock(SRW_LOCK_CALL);
+  mysql_mutex_lock(&lock_sys.wait_mutex);
+
+  const lock_t *wait_lock= trx->lock.wait_lock;
+  if (!wait_lock)
+  {
+func_exit:
+    lock_sys.wr_unlock();
+    mysql_mutex_unlock(&lock_sys.wait_mutex);
+    return;
+  }
+
+  if (wait_lock->is_table())
+  {
+    dict_table_t *table= wait_lock->un_member.tab_lock.table;
+    for (lock_t *lock= UT_LIST_GET_FIRST(table->locks); lock;
+         lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock))
+    {
+      /* if victim has also BF status, but has earlier seqno, we have to wait */
+      if (lock->trx != trx &&
+          !(wsrep_thd_is_BF(lock->trx->mysql_thd, false) &&
+            wsrep_thd_order_before(lock->trx->mysql_thd, trx->mysql_thd)))
+      {
+        if (wsrep_thd_is_BF(lock->trx->mysql_thd, false))
+        {
+          // There is no need to kill victim with compatible lock
+          if (!lock_has_to_wait(trx->lock.wait_lock, lock))
+            continue;
+
+#ifdef UNIV_DEBUG
+          wsrep_report_error(lock, trx);
+#endif
+        }
+
+        victims.emplace(lock->trx);
+      }
+    }
+  }
+  else
+  {
+    const page_id_t id{wait_lock->un_member.rec_lock.page_id};
+    hash_cell_t &cell= *(wait_lock->type_mode & LOCK_PREDICATE
+                         ? lock_sys.prdt_hash : lock_sys.rec_hash).cell_get
+      (id.fold());
+    if (lock_t *lock= lock_sys_t::get_first(cell, id))
+    {
+      const ulint heap_no= lock_rec_find_set_bit(wait_lock);
+      if (!lock_rec_get_nth_bit(lock, heap_no))
+        lock= lock_rec_get_next(heap_no, lock);
+      do
+      {
+        /* if victim has also BF status, but has earlier seqno, we have to wait */
+        if (lock->trx != trx &&
+            !(wsrep_thd_is_BF(lock->trx->mysql_thd, false) &&
+              wsrep_thd_order_before(lock->trx->mysql_thd, trx->mysql_thd)))
+        {
+          if (wsrep_thd_is_BF(lock->trx->mysql_thd, false))
+          {
+            // There is no need to kill victim with compatible lock
+            if (!lock_has_to_wait(trx->lock.wait_lock, lock))
+              continue;
+
+#ifdef UNIV_DEBUG
+            wsrep_report_error(lock, trx);
+#endif
+          }
+
+          victims.emplace(lock->trx);
+        }
+      } while ((lock= lock_rec_get_next(heap_no, lock)));
+    }
+  }
+
+  if (victims.empty())
+    goto func_exit;
+
+  std::vector<std::pair<ulong,trx_id_t>> victim_id;
+  for (trx_t *v : victims)
+    victim_id.emplace_back(std::pair<ulong,trx_id_t>
+                           {thd_get_thread_id(v->mysql_thd), v->id});
+
+  DBUG_EXECUTE_IF("sync.before_wsrep_thd_abort",
+                  {
+                    const char act[]=
+                      "now SIGNAL sync.before_wsrep_thd_abort_reached "
+                      "WAIT_FOR signal.before_wsrep_thd_abort";
+                    DBUG_ASSERT(!debug_sync_set_action(trx->mysql_thd,
+                                                       STRING_WITH_LEN(act)));
+                  };);
+
+  lock_sys.wr_unlock();
+  mysql_mutex_unlock(&lock_sys.wait_mutex);
+
+  for (const auto &v : victim_id)
+    lock_wait_wsrep_kill(trx, v.first, v.second);
+}
+#endif /* WITH_WSREP */
+
+/*********************************************************************//**
+Checks if some other transaction has a conflicting explicit lock request
+in the queue, so that we have to wait.
+@param[in] mode LOCK_S or LOCK_X, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
+LOCK_INSERT_INTENTION
+@param[in] cell lock hash table cell
+@param[in] id page identifier
+@param[in] heap_no heap number of the record
+@param[in] trx our transaction
+@return conflicting lock and the flag which indicated if conflicting locks
+which wait for the current transaction were ignored */
+static lock_t *lock_rec_other_has_conflicting(unsigned mode,
+                                              const hash_cell_t &cell,
+                                              const page_id_t id,
+                                              ulint heap_no, const trx_t *trx)
+{
+	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
+
+	for (lock_t* lock = lock_sys_t::get_first(cell, id, heap_no);
+	     lock; lock = lock_rec_get_next(heap_no, lock)) {
+		if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index.
+@return transaction id of the transaction which has the x-lock, or 0;
+NOTE that this function can return false positives but never false
+negatives. The caller must confirm all positive results by calling
+trx_is_active(). */
+static
+trx_t*
+lock_sec_rec_some_has_impl(
+/*=======================*/
+	trx_t*		caller_trx,/*!<in/out: trx of current thread */
+	const rec_t*	rec,	/*!< in: user record */
+	dict_index_t*	index,	/*!< in: secondary index */
+	const rec_offs*	offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+  lock_sys.assert_unlocked();
+  ut_ad(!dict_index_is_clust(index));
+  ut_ad(page_rec_is_user_rec(rec));
+  ut_ad(rec_offs_validate(rec, index, offsets));
+  ut_ad(!rec_is_metadata(rec, *index));
+
+  const trx_id_t max_trx_id= page_get_max_trx_id(page_align(rec));
+
+  /* Note: It is possible to have caller_trx->id == 0 in a locking read
+  if caller_trx has not modified any persistent tables. */
+  if (!trx_sys.find_same_or_older(caller_trx, max_trx_id) ||
+      !lock_check_trx_id_sanity(max_trx_id, rec, index, offsets))
+    return nullptr;
+
+  /* We checked above that some active (or XA PREPARE) transaction exists
+  that is older than PAGE_MAX_TRX_ID. That is, some transaction may be
+  holding an implicit lock on the record. We have to look up the
+  clustered index record to find if it is (or was) the case. */
+  return row_vers_impl_x_locked(caller_trx, rec, index, offsets);
+}
+
+/*********************************************************************//**
+Return the number of table locks for a transaction.
+The caller must be holding lock_sys.latch. */
+ulint
+lock_number_of_tables_locked(
+/*=========================*/
+	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
+{
+	const lock_t*	lock;
+	ulint		n_tables = 0;
+
+	lock_sys.assert_locked();
+
+	for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+		if (lock->is_table()) {
+			n_tables++;
+		}
+	}
+
+	return(n_tables);
+}
+
+/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
+
+/** Reset the wait status of a lock.
+@param[in,out]	lock	lock that was possibly being waited for */
+static void lock_reset_lock_and_trx_wait(lock_t *lock)
+{
+  lock_sys.assert_locked(*lock);
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+  trx_t *trx= lock->trx;
+  ut_ad(lock->is_waiting());
+  ut_ad(!trx->lock.wait_lock || trx->lock.wait_lock == lock);
+  if (trx_t *wait_trx= trx->lock.wait_trx)
+    Deadlock::to_check.erase(wait_trx);
+  trx->lock.wait_lock= nullptr;
+  trx->lock.wait_trx= nullptr;
+  lock->type_mode&= ~LOCK_WAIT;
+}
+
+#ifdef UNIV_DEBUG
+/** Check transaction state */
+static void check_trx_state(const trx_t *trx)
+{
+  ut_ad(!trx->auto_commit || trx->will_lock);
+  const auto state= trx->state;
+  ut_ad(state == TRX_STATE_ACTIVE ||
+        state == TRX_STATE_PREPARED_RECOVERED ||
+        state == TRX_STATE_PREPARED ||
+        state == TRX_STATE_COMMITTED_IN_MEMORY);
+}
+#endif
+
+/** Create a new record lock and inserts it to the lock queue,
+without checking for deadlocks or conflicts.
+@param[in]	c_lock		conflicting lock
+@param[in]	type_mode	lock mode and wait flag
+@param[in]	page_id		index page number
+@param[in]	page		R-tree index page, or NULL
+@param[in]	heap_no		record heap number in the index page
+@param[in]	index		the index tree
+@param[in,out]	trx		transaction
+@param[in]	holds_trx_mutex	whether the caller holds trx->mutex
+@return created lock */
+lock_t*
+lock_rec_create_low(
+	lock_t*		c_lock,
+	unsigned	type_mode,
+	const page_id_t	page_id,
+	const page_t*	page,
+	ulint		heap_no,
+	dict_index_t*	index,
+	trx_t*		trx,
+	bool		holds_trx_mutex)
+{
+	lock_t*		lock;
+	ulint		n_bytes;
+
+	ut_d(lock_sys.hash_get(type_mode).assert_locked(page_id));
+	ut_ad(xtest() || holds_trx_mutex == trx->mutex_is_owner());
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+	ut_ad(!(type_mode & LOCK_TABLE));
+	ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+	ut_ad(!trx->is_autocommit_non_locking());
+
+	/* If rec is the supremum record, then we reset the gap and
+	LOCK_REC_NOT_GAP bits, as all locks on the supremum are
+	automatically of the gap type */
+
+	if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
+		type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+	}
+
+	if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)))) {
+		n_bytes = (page_dir_get_n_heap(page) + 7) / 8;
+	} else {
+		ut_ad(heap_no == PRDT_HEAPNO);
+
+		/* The lock is always on PAGE_HEAP_NO_INFIMUM (0), so
+		we only need 1 bit (which round up to 1 byte) for
+		lock bit setting */
+		n_bytes = 1;
+
+		if (type_mode & LOCK_PREDICATE) {
+			ulint	tmp = UNIV_WORD_SIZE - 1;
+
+			/* We will attach predicate structure after lock.
+			Make sure the memory is aligned on 8 bytes,
+			the mem_heap_alloc will align it with
+			MEM_SPACE_NEEDED anyway. */
+			n_bytes = (n_bytes + sizeof(lock_prdt_t) + tmp) & ~tmp;
+			ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
+		}
+	}
+
+	if (!holds_trx_mutex) {
+		trx->mutex_lock();
+	}
+	ut_ad(trx->mutex_is_owner());
+	ut_ad(trx->state != TRX_STATE_NOT_STARTED);
+
+	if (trx->lock.rec_cached >= UT_ARR_SIZE(trx->lock.rec_pool)
+	    || sizeof *lock + n_bytes > sizeof *trx->lock.rec_pool) {
+		lock = static_cast<lock_t*>(
+			mem_heap_alloc(trx->lock.lock_heap,
+				       sizeof *lock + n_bytes));
+	} else {
+		lock = &trx->lock.rec_pool[trx->lock.rec_cached++].lock;
+	}
+
+	lock->trx = trx;
+	lock->type_mode = type_mode;
+	lock->index = index;
+	lock->un_member.rec_lock.page_id = page_id;
+
+	if (UNIV_LIKELY(!(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)))) {
+		lock->un_member.rec_lock.n_bits = uint32_t(n_bytes * 8);
+	} else {
+		/* Predicate lock always on INFIMUM (0) */
+		lock->un_member.rec_lock.n_bits = 8;
+ 	}
+	lock_rec_bitmap_reset(lock);
+	lock_rec_set_nth_bit(lock, heap_no);
+	index->table->n_rec_locks++;
+	ut_ad(index->table->get_ref_count() || !index->table->can_be_evicted);
+
+	const auto lock_hash = &lock_sys.hash_get(type_mode);
+	lock_hash->cell_get(page_id.fold())->append(*lock, &lock_t::hash);
+
+	if (type_mode & LOCK_WAIT) {
+		if (trx->lock.wait_trx) {
+			ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx);
+			ut_ad(trx->lock.wait_lock);
+			ut_ad((*trx->lock.wait_lock).trx == trx);
+		} else {
+			ut_ad(c_lock);
+			trx->lock.wait_trx = c_lock->trx;
+			ut_ad(!trx->lock.wait_lock);
+		}
+		trx->lock.wait_lock = lock;
+	}
+	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
+	if (!holds_trx_mutex) {
+		trx->mutex_unlock();
+	}
+	MONITOR_INC(MONITOR_RECLOCK_CREATED);
+	MONITOR_INC(MONITOR_NUM_RECLOCK);
+
+	return lock;
+}
+
+/** Enqueue a waiting request for a lock which cannot be granted immediately.
+Check for deadlocks.
+@param[in]	type_mode	the requested lock mode (LOCK_S or LOCK_X)
+				possibly ORed with LOCK_GAP or
+				LOCK_REC_NOT_GAP, ORed with
+				LOCK_INSERT_INTENTION if this
+				waiting lock request is set
+				when performing an insert of
+				an index record
+@param[in]	id		page identifier
+@param[in]	page		leaf page in the index
+@param[in]	heap_no		record heap number in the block
+@param[in]	index		index tree
+@param[in,out]	thr		query thread
+@param[in]	prdt		minimum bounding box (spatial index)
+@retval	DB_LOCK_WAIT		if the waiting lock was enqueued
+@retval	DB_DEADLOCK		if this transaction was chosen as the victim */
+dberr_t
+lock_rec_enqueue_waiting(
+	lock_t*			c_lock,
+	unsigned		type_mode,
+	const page_id_t		id,
+	const page_t*		page,
+	ulint			heap_no,
+	dict_index_t*		index,
+	que_thr_t*		thr,
+	lock_prdt_t*		prdt)
+{
+	ut_d(lock_sys.hash_get(type_mode).assert_locked(id));
+	ut_ad(!srv_read_only_mode);
+	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+
+	trx_t* trx = thr_get_trx(thr);
+	ut_ad(xtest() || trx->mutex_is_owner());
+	ut_ad(!trx->dict_operation_lock_mode);
+        /* Apart from Galera, only transactions that have waiting lock can be
+        chosen as deadlock victim. Only one lock can be waited for at a time,
+        and a transaction is associated with a single thread. That is why there
+        must not be waiting lock requests if the transaction is deadlock victim
+        and it is not WSREP. Galera transaction abort can be invoked from MDL
+        acquisition code when the transaction does not have waiting record
+        lock, that's why we check only deadlock victim bit here. */
+        ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
+
+	if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) {
+		trx->error_state = DB_LOCK_WAIT_TIMEOUT;
+		return DB_LOCK_WAIT_TIMEOUT;
+	}
+
+	/* Enqueue the lock request that will wait to be granted, note that
+	we already own the trx mutex. */
+	lock_t* lock = lock_rec_create_low(
+		c_lock,
+		type_mode | LOCK_WAIT, id, page, heap_no, index, trx, true);
+
+	if (prdt && type_mode & LOCK_PREDICATE) {
+		lock_prdt_set_prdt(lock, prdt);
+	}
+
+	trx->lock.wait_thr = thr;
+
+	DBUG_LOG("ib_lock", "trx " << ib::hex(trx->id)
+		 << " waits for lock in index " << index->name
+		 << " of table " << index->table->name);
+
+	MONITOR_INC(MONITOR_LOCKREC_WAIT);
+
+	return DB_LOCK_WAIT;
+}
+
+/*********************************************************************//**
+Looks for a suitable type record lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old is found.
+@return lock or NULL */
+static inline
+lock_t*
+lock_rec_find_similar_on_page(
+	ulint           type_mode,      /*!< in: lock type_mode field */
+	ulint           heap_no,        /*!< in: heap number of the record */
+	lock_t*         lock,           /*!< in: lock_sys.get_first() */
+	const trx_t*    trx)            /*!< in: transaction */
+{
+	lock_sys.rec_hash.assert_locked(lock->un_member.rec_lock.page_id);
+
+	for (/* No op */;
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock->trx == trx
+		    && lock->type_mode == type_mode
+		    && lock_rec_get_n_bits(lock) > heap_no) {
+
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Adds a record lock request in the record queue. The request is normally
+added as the last in the queue, but if there are no waiting lock requests
+on the record, and the request to be added is not a waiting request, we
+can reuse a suitable record lock object already existing on the same page,
+just setting the appropriate bit in its bitmap. This is a low-level function
+which does NOT check for deadlocks or lock compatibility!
+@param[in] type_mode lock mode, wait, gap etc. flags
+@param[in,out] cell first hash table cell
+@param[in] id page identifier
+@param[in] page buffer block containing the record
+@param[in] heap_no heap number of the record
+@param[in] index index of record
+@param[in,out] trx transaction
+@param[in] caller_owns_trx_mutex TRUE if caller owns the transaction mutex */
+TRANSACTIONAL_TARGET
+static void lock_rec_add_to_queue(unsigned type_mode, const hash_cell_t &cell,
+                                  const page_id_t id, const page_t *page,
+                                  ulint heap_no, dict_index_t *index,
+                                  trx_t *trx, bool caller_owns_trx_mutex)
+{
+	ut_d(lock_sys.hash_get(type_mode).assert_locked(id));
+	ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner());
+	ut_ad(index->is_primary()
+	      || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
+	ut_ad(!(type_mode & LOCK_TABLE));
+#ifdef UNIV_DEBUG
+	switch (type_mode & LOCK_MODE_MASK) {
+	case LOCK_X:
+	case LOCK_S:
+		break;
+	default:
+		ut_error;
+	}
+
+	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
+		lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
+			? LOCK_X
+			: LOCK_S;
+		const lock_t*	other_lock
+			= lock_rec_other_has_expl_req(
+				mode, cell, id, false, heap_no, trx);
+#ifdef WITH_WSREP
+		if (UNIV_LIKELY_NULL(other_lock) && trx->is_wsrep()) {
+			/* Only BF transaction may be granted lock
+			before other conflicting lock request. */
+			if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)
+			    && !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) {
+				/* If it is not BF, this case is a bug. */
+				wsrep_report_bf_lock_wait(trx->mysql_thd, trx->id);
+				wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id);
+				ut_error;
+			}
+		} else
+#endif /* WITH_WSREP */
+		ut_ad(!other_lock);
+	}
+#endif /* UNIV_DEBUG */
+
+	/* If rec is the supremum record, then we can reset the gap bit, as
+	all locks on the supremum are automatically of the gap type, and we
+	try to avoid unnecessary memory consumption of a new record lock
+	struct for a gap type lock */
+
+	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
+		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
+
+		/* There should never be LOCK_REC_NOT_GAP on a supremum
+		record, but let us play safe */
+
+		type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
+	}
+
+	if (type_mode & LOCK_WAIT) {
+		goto create;
+	} else if (lock_t *first_lock = lock_sys_t::get_first(cell, id)) {
+		for (lock_t* lock = first_lock;;) {
+			if (lock->is_waiting()
+			    && lock_rec_get_nth_bit(lock, heap_no)) {
+				goto create;
+			}
+			if (!(lock = lock_rec_get_next_on_page(lock))) {
+				break;
+			}
+		}
+
+		/* Look for a similar record lock on the same page:
+		if one is found and there are no waiting lock requests,
+		we can just set the bit */
+		if (lock_t* lock = lock_rec_find_similar_on_page(
+			    type_mode, heap_no, first_lock, trx)) {
+			trx_t* lock_trx = lock->trx;
+			if (caller_owns_trx_mutex) {
+				trx->mutex_unlock();
+			}
+			{
+				TMTrxGuard tg{*lock_trx};
+				lock_rec_set_nth_bit(lock, heap_no);
+			}
+
+			if (caller_owns_trx_mutex) {
+				trx->mutex_lock();
+			}
+			return;
+		}
+	}
+
+create:
+	/* Note: We will not pass any conflicting lock to lock_rec_create(),
+	because we should be moving an existing waiting lock request. */
+	ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx);
+
+	lock_rec_create_low(nullptr,
+			    type_mode, id, page, heap_no, index, trx,
+			    caller_owns_trx_mutex);
+}
+
+/** A helper function for lock_rec_lock_slow(), which grants a Next Key Lock
+(either LOCK_X or LOCK_S as specified by `mode`) on <`block`,`heap_no`> in the
+`index` to the `trx`, assuming that it already has a granted `held_lock`, which
+is at least as strong as mode|LOCK_REC_NOT_GAP. It does so by either reusing the
+lock if it already covers the gap, or by ensuring a separate GAP Lock, which in
+combination with Record Lock satisfies the request.
+@param[in]      held_lock   a lock granted to `trx` which is at least as strong
+                            as mode|LOCK_REC_NOT_GAP
+@param[in]      mode        requested lock mode: LOCK_X or LOCK_S
+@param[in]      cell        lock hash table cell
+@param[in]      id          page identifier
+@param[in]      page        buffer block containing the record
+@param[in]      heap_no     heap number of the record to be locked
+@param[in]      index       index of record to be locked
+@param[in]      trx         the transaction requesting the Next Key Lock */
+static void lock_reuse_for_next_key_lock(const lock_t *held_lock,
+                                         unsigned mode,
+                                         const hash_cell_t &cell,
+                                         const page_id_t id,
+                                         const page_t *page, ulint heap_no,
+                                         dict_index_t *index, trx_t *trx)
+{
+  ut_ad(trx->mutex_is_owner());
+  ut_ad(mode == LOCK_S || mode == LOCK_X);
+  ut_ad(lock_mode_is_next_key_lock(mode));
+
+  if (!held_lock->is_record_not_gap())
+  {
+    ut_ad(held_lock->is_next_key_lock());
+    return;
+  }
+
+  /* We have a Record Lock granted, so we only need a GAP Lock. We assume
+  that GAP Locks do not conflict with anything. Therefore a GAP Lock
+  could be granted to us right now if we've requested: */
+  mode|= LOCK_GAP;
+  ut_ad(nullptr ==
+        lock_rec_other_has_conflicting(mode, cell, id, heap_no, trx));
+
+  /* It might be the case we already have one, so we first check that. */
+  if (lock_rec_has_expl(mode, cell, id, heap_no, trx) == nullptr)
+    lock_rec_add_to_queue(mode, cell, id, page, heap_no, index, trx, true);
+}
+
+
+/*********************************************************************//**
+Tries to lock the specified record in the mode requested. If not immediately
+possible, enqueues a waiting lock request. This is a low-level function
+which does NOT look at implicit locks! Checks lock compatibility within
+explicit locks. This function sets a normal next-key lock, or in the case
+of a page supremum record, a gap type lock.
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
+static
+dberr_t
+lock_rec_lock(
+/*==========*/
+	bool			impl,	/*!< in: if true, no lock is set
+					if no wait is necessary: we
+					assume that the caller will
+					set an implicit lock */
+	unsigned		mode,	/*!< in: lock mode: LOCK_X or
+					LOCK_S possibly ORed to either
+					LOCK_GAP or LOCK_REC_NOT_GAP */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	ulint			heap_no,/*!< in: heap number of record */
+	dict_index_t*		index,	/*!< in: index of record */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+  trx_t *trx= thr_get_trx(thr);
+  /* There must not be lock requests for reads or updates if transaction was
+  chosen as deadlock victim. Apart from Galera, only transactions that have
+  waiting lock may be chosen as deadlock victims. Only one lock can be waited
+  for at a time, and a transaction is associated with a single thread. Galera
+  transaction abort can be invoked from MDL acquisition code when the
+  transaction does not have waiting lock, that's why we check only deadlock
+  victim bit here. */
+  ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
+  ut_ad(!srv_read_only_mode);
+  ut_ad(((LOCK_MODE_MASK | LOCK_TABLE) & mode) == LOCK_S ||
+        ((LOCK_MODE_MASK | LOCK_TABLE) & mode) == LOCK_X);
+  ut_ad(~mode & (LOCK_GAP | LOCK_REC_NOT_GAP));
+  ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
+  DBUG_EXECUTE_IF("innodb_report_deadlock", return DB_DEADLOCK;);
+#ifdef ENABLED_DEBUG_SYNC
+  if (trx->mysql_thd)
+    DEBUG_SYNC_C("lock_rec");
+#endif
+
+  ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
+        lock_table_has(trx, index->table, LOCK_IS));
+  ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
+         lock_table_has(trx, index->table, LOCK_IX));
+
+  if (lock_table_has(trx, index->table,
+                     static_cast<lock_mode>(LOCK_MODE_MASK & mode)))
+    return DB_SUCCESS;
+
+  /* During CREATE TABLE, we will write to newly created FTS_*_CONFIG
+  on which no lock has been created yet. */
+  ut_ad(!trx->dict_operation_lock_mode ||
+        (strstr(index->table->name.m_name, "/FTS_") &&
+         strstr(index->table->name.m_name, "_CONFIG") + sizeof("_CONFIG") ==
+         index->table->name.m_name + strlen(index->table->name.m_name) + 1));
+  MONITOR_ATOMIC_INC(MONITOR_NUM_RECLOCK_REQ);
+  const page_id_t id{block->page.id()};
+  LockGuard g{lock_sys.rec_hash, id};
+
+  if (lock_t *lock= lock_sys_t::get_first(g.cell(), id))
+  {
+    dberr_t err= DB_SUCCESS;
+    trx->mutex_lock();
+    if (lock_rec_get_next_on_page(lock) ||
+        lock->trx != trx ||
+        lock->type_mode != mode ||
+        lock_rec_get_n_bits(lock) <= heap_no)
+    {
+
+      unsigned checked_mode= (heap_no != PAGE_HEAP_NO_SUPREMUM &&
+                          lock_mode_is_next_key_lock(mode))
+                             ? mode | LOCK_REC_NOT_GAP
+                             : mode;
+
+      const lock_t *held_lock=
+          lock_rec_has_expl(checked_mode, g.cell(), id, heap_no, trx);
+
+      /* Do nothing if the trx already has a strong enough lock on rec */
+      if (!held_lock)
+      {
+        if (lock_t *c_lock= lock_rec_other_has_conflicting(mode, g.cell(), id,
+                                                           heap_no, trx))
+          /*
+            If another transaction has a non-gap conflicting
+            request in the queue, as this transaction does not
+            have a lock strong enough already granted on the
+            record, we have to wait.
+          */
+          err= lock_rec_enqueue_waiting(c_lock, mode, id, block->page.frame,
+                                        heap_no, index, thr, nullptr);
+        else if (!impl)
+        {
+          /* Set the requested lock on the record. */
+          lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, heap_no,
+                                index, trx, true);
+          err= DB_SUCCESS_LOCKED_REC;
+        }
+      }
+      /* If checked_mode == mode, trx already has a strong enough lock on rec */
+      else if (checked_mode != mode)
+      {
+        /* As check_mode != mode, the mode is Next Key Lock, which can not be
+        emulated by implicit lock (which are LOCK_REC_NOT_GAP only). */
+        ut_ad(!impl);
+
+        lock_reuse_for_next_key_lock(held_lock, mode, g.cell(), id,
+                                     block->page.frame, heap_no, index, trx);
+      }
+    }
+    else if (!impl)
+    {
+      /*
+        If the nth bit of the record lock is already set then we do not set
+        a new lock bit, otherwise we do set
+      */
+      if (!lock_rec_get_nth_bit(lock, heap_no))
+      {
+        lock_rec_set_nth_bit(lock, heap_no);
+        err= DB_SUCCESS_LOCKED_REC;
+      }
+    }
+    trx->mutex_unlock();
+    return err;
+  }
+
+  /* Simplified and faster path for the most common cases */
+  if (!impl)
+    lock_rec_create_low(nullptr, mode, id, block->page.frame, heap_no, index,
+                        trx, false);
+
+  return DB_SUCCESS_LOCKED_REC;
+}
+
+/*********************************************************************//**
+Checks if a waiting record lock request still has to wait in a queue.
+@return lock that is causing the wait */
+static
+const lock_t*
+lock_rec_has_to_wait_in_queue(const hash_cell_t &cell, const lock_t *wait_lock)
+{
+	const lock_t*	lock;
+	ulint		heap_no;
+	ulint		bit_mask;
+	ulint		bit_offset;
+
+	ut_ad(wait_lock->is_waiting());
+	ut_ad(!wait_lock->is_table());
+
+	heap_no = lock_rec_find_set_bit(wait_lock);
+
+	bit_offset = heap_no / 8;
+	bit_mask = static_cast<ulint>(1) << (heap_no % 8);
+
+	for (lock = lock_sys_t::get_first(
+		     cell, wait_lock->un_member.rec_lock.page_id);
+	     lock != wait_lock;
+	     lock = lock_rec_get_next_on_page_const(lock)) {
+		const byte*	p = (const byte*) &lock[1];
+
+		if (heap_no < lock_rec_get_n_bits(lock)
+		    && (p[bit_offset] & bit_mask)
+		    && lock_has_to_wait(wait_lock, lock)) {
+#ifdef WITH_WSREP
+			if (lock->trx->is_wsrep() &&
+			    wsrep_thd_order_before(wait_lock->trx->mysql_thd,
+						   lock->trx->mysql_thd)) {
+				/* don't wait for another BF lock */
+				continue;
+			}
+#endif
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/** Note that a record lock wait started */
+inline void lock_sys_t::wait_start()
+{
+  mysql_mutex_assert_owner(&wait_mutex);
+  wait_count+= WAIT_COUNT_STEP + 1;
+  /* The maximum number of concurrently waiting transactions is one less
+  than the maximum number of concurrent transactions. */
+  static_assert(WAIT_COUNT_STEP == UNIV_PAGE_SIZE_MAX / 16 * TRX_SYS_N_RSEGS,
+                "compatibility");
+}
+
+/** Note that a record lock wait resumed */
+inline
+void lock_sys_t::wait_resume(THD *thd, my_hrtime_t start, my_hrtime_t now)
+{
+  mysql_mutex_assert_owner(&wait_mutex);
+  ut_ad(get_wait_pending());
+  ut_ad(get_wait_cumulative());
+  wait_count--;
+  if (now.val >= start.val)
+  {
+    const uint64_t diff_time=
+      static_cast<uint64_t>((now.val - start.val) / 1000);
+    wait_time+= diff_time;
+
+    if (diff_time > wait_time_max)
+      wait_time_max= diff_time;
+
+    thd_storage_lock_wait(thd, diff_time);
+  }
+}
+
+#ifdef HAVE_REPLICATION
+ATTRIBUTE_NOINLINE MY_ATTRIBUTE((nonnull, warn_unused_result))
+/** Report lock waits to parallel replication. Sets
+trx->error_state= DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was
+set when lock_sys.wait_mutex was unlocked.
+@param trx       transaction that may be waiting for a lock
+@param wait_lock lock that is being waited for
+@return lock being waited for (may have been replaced by an equivalent one)
+@retval nullptr if no lock is being waited for */
+static lock_t *lock_wait_rpl_report(trx_t *trx)
+{
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+  ut_ad(trx->state == TRX_STATE_ACTIVE);
+  THD *const thd= trx->mysql_thd;
+  ut_ad(thd);
+  lock_t *wait_lock= trx->lock.wait_lock;
+  if (!wait_lock)
+    return nullptr;
+  /* This would likely be too large to attempt to use a memory transaction,
+  even for wait_lock->is_table(). */
+  const bool nowait= lock_sys.wr_lock_try();
+  if (!nowait)
+  {
+    mysql_mutex_unlock(&lock_sys.wait_mutex);
+    lock_sys.wr_lock(SRW_LOCK_CALL);
+    mysql_mutex_lock(&lock_sys.wait_mutex);
+    wait_lock= trx->lock.wait_lock;
+    if (!wait_lock)
+    {
+func_exit:
+      lock_sys.wr_unlock();
+      /* trx->lock.was_chosen_as_deadlock_victim can be set when
+      lock_sys.wait_mutex was unlocked, let's check it. */
+      if (!nowait && trx->lock.was_chosen_as_deadlock_victim)
+        trx->error_state= DB_DEADLOCK;
+      return wait_lock;
+    }
+    ut_ad(wait_lock->is_waiting());
+  }
+  else if (!wait_lock->is_waiting())
+  {
+    wait_lock= trx->lock.wait_lock;
+    if (!wait_lock)
+      goto func_exit;
+    if (!wait_lock->is_waiting())
+    {
+      wait_lock= nullptr;
+      goto func_exit;
+    }
+  }
+
+  if (wait_lock->is_table())
+  {
+    dict_table_t *table= wait_lock->un_member.tab_lock.table;
+    for (lock_t *lock= UT_LIST_GET_FIRST(table->locks); lock;
+         lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock))
+      if (lock->trx != trx)
+        thd_rpl_deadlock_check(thd, lock->trx->mysql_thd);
+  }
+  else
+  {
+    const page_id_t id{wait_lock->un_member.rec_lock.page_id};
+    hash_cell_t &cell= *(wait_lock->type_mode & LOCK_PREDICATE
+                         ? lock_sys.prdt_hash : lock_sys.rec_hash).cell_get
+      (id.fold());
+    if (lock_t *lock= lock_sys_t::get_first(cell, id))
+    {
+      const ulint heap_no= lock_rec_find_set_bit(wait_lock);
+      if (!lock_rec_get_nth_bit(lock, heap_no))
+        lock= lock_rec_get_next(heap_no, lock);
+      do
+        if (lock->trx->mysql_thd != thd)
+          thd_rpl_deadlock_check(thd, lock->trx->mysql_thd);
+      while ((lock= lock_rec_get_next(heap_no, lock)));
+    }
+  }
+
+  goto func_exit;
+}
+#endif /* HAVE_REPLICATION */
+
+/** Wait for a lock to be released.
+@retval DB_DEADLOCK if this transaction was chosen as the deadlock victim
+@retval DB_INTERRUPTED if the execution was interrupted by the user
+@retval DB_LOCK_WAIT_TIMEOUT if the lock wait timed out
+@retval DB_SUCCESS if the lock was granted */
+dberr_t lock_wait(que_thr_t *thr)
+{
+  trx_t *trx= thr_get_trx(thr);
+
+#ifdef ENABLED_DEBUG_SYNC
+  if (trx->mysql_thd)
+    DEBUG_SYNC_C("lock_wait_start");
+
+  /* Create the sync point for any quit from the function. */
+  SCOPE_EXIT([trx]() {
+    if (trx->mysql_thd)
+      DEBUG_SYNC_C("lock_wait_end");
+  });
+#endif
+
+  /* InnoDB system transactions may use the global value of
+  innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
+  const ulong innodb_lock_wait_timeout= trx_lock_wait_timeout_get(trx);
+  const my_hrtime_t suspend_time= my_hrtime_coarse();
+  ut_ad(!trx->dict_operation_lock_mode);
+
+  /* The wait_lock can be cleared by another thread in lock_grant(),
+  lock_rec_cancel(), lock_cancel_waiting_and_release(), which could be
+  invoked from the high-level function lock_sys_t::cancel().
+  But, a wait can only be initiated by the current thread which owns
+  the transaction.
+
+  Even if trx->lock.wait_lock were changed, the object that it used to
+  point to it will remain valid memory (remain allocated from
+  trx->lock.lock_heap). If trx->lock.wait_lock was set to nullptr, the
+  original object could be transformed to a granted lock. On a page
+  split or merge, we would change trx->lock.wait_lock to point to
+  another waiting lock request object, and the old object would be
+  logically discarded.
+
+  In any case, it is safe to read the memory that wait_lock points to,
+  even though we are not holding any mutex. We are only reading
+  wait_lock->type_mode & (LOCK_TABLE | LOCK_AUTO_INC), which will be
+  unaffected by any page split or merge operation. (Furthermore,
+  table lock objects will never be cloned or moved.) */
+  lock_t *wait_lock= trx->lock.wait_lock;
+
+  if (!wait_lock)
+  {
+    /* The lock has already been released or this transaction
+    was chosen as a deadlock victim: no need to wait */
+    if (trx->lock.was_chosen_as_deadlock_victim)
+      trx->error_state= DB_DEADLOCK;
+    else if (trx->error_state == DB_LOCK_WAIT)
+      trx->error_state= DB_SUCCESS;
+    return trx->error_state;
+  }
+
+  /* Because we are not holding exclusive lock_sys.latch, the
+  wait_lock may be changed by other threads during a page split or
+  merge in case it is a record lock.
+
+  Because at this point we are not holding lock_sys.wait_mutex either,
+  another thread may set trx->lock.wait_lock == nullptr at any time. */
+
+  trx->lock.suspend_time= suspend_time;
+
+  ut_ad(!trx->dict_operation_lock_mode);
+
+  IF_WSREP(if (trx->is_wsrep()) lock_wait_wsrep(trx),);
+
+  const auto type_mode= wait_lock->type_mode;
+#ifdef HAVE_REPLICATION
+  /* Even though lock_wait_rpl_report() has nothing to do with
+  deadlock detection, it was always disabled by innodb_deadlock_detect=OFF.
+  We will keep it in that way, because unfortunately
+  thd_need_wait_reports() will hold even if parallel (or any) replication
+  is not being used. We want to be allow the user to skip
+  lock_wait_rpl_report(). */
+  const bool rpl= trx->mysql_thd && innodb_deadlock_detect &&
+    thd_need_wait_reports(trx->mysql_thd);
+#endif
+  const bool row_lock_wait= thr->lock_state == QUE_THR_LOCK_ROW;
+  timespec abstime;
+  set_timespec_time_nsec(abstime, suspend_time.val * 1000);
+  abstime.MY_tv_sec+= innodb_lock_wait_timeout;
+  /* Dictionary transactions must wait be immune to lock wait timeouts
+  for locks on data dictionary tables. Here we check only for
+  SYS_TABLES, SYS_COLUMNS, SYS_INDEXES, SYS_FIELDS. Locks on further
+  tables SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_VIRTUAL will only be
+  acquired while holding an exclusive lock on one of the 4 tables. */
+  const bool no_timeout= innodb_lock_wait_timeout >= 100000000 ||
+    ((type_mode & LOCK_TABLE) &&
+     wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID);
+  thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE)
+                 ? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK);
+
+  mysql_mutex_lock(&lock_sys.wait_mutex);
+  /* Now that we are holding lock_sys.wait_mutex, we must reload
+  trx->lock.wait_mutex. It cannot be cleared as long as we are holding
+  lock_sys.wait_mutex, but as long as we do not hold exclusive
+  lock_sys.latch, a waiting record lock can be replaced with an
+  equivalent waiting record lock during a page split or merge by
+  another thread. See lock_sys_t::cancel(). */
+  wait_lock= trx->lock.wait_lock;
+
+  if (wait_lock)
+  {
+    /* Dictionary transactions must ignore KILL, because they could
+    be executed as part of a multi-transaction DDL operation,
+    such as rollback_inplace_alter_table() or ha_innobase::delete_table(). */
+    if (!trx->dict_operation && trx_is_interrupted(trx))
+    {
+      /* innobase_kill_query() can only set trx->error_state=DB_INTERRUPTED
+      for any transaction that is attached to a connection.
+
+      Furthermore, innobase_kill_query() could have been invoked before
+      this thread entered a lock wait. The thd_kill_level() or thd::killed
+      is only being checked every now and then. */
+      trx->error_state= DB_INTERRUPTED;
+      goto abort_wait;
+    }
+
+    wait_lock= Deadlock::check_and_resolve(trx, wait_lock);
+
+    if (wait_lock == reinterpret_cast<lock_t*>(-1))
+    {
+      trx->error_state= DB_DEADLOCK;
+      goto end_wait;
+    }
+  }
+  else
+  {
+    /* trx->lock.was_chosen_as_deadlock_victim can be changed before
+    lock_sys.wait_mutex is acquired, so let's check it once more. */
+    if (trx->lock.was_chosen_as_deadlock_victim)
+      trx->error_state= DB_DEADLOCK;
+    else if (trx->error_state == DB_LOCK_WAIT)
+      trx->error_state= DB_SUCCESS;
+    goto end_wait;
+  }
+  if (row_lock_wait)
+    lock_sys.wait_start();
+
+#ifdef HAVE_REPLICATION
+  if (rpl)
+    wait_lock= lock_wait_rpl_report(trx);
+#endif
+
+  switch (trx->error_state) {
+  case DB_SUCCESS:
+    break;
+  case DB_LOCK_WAIT:
+    trx->error_state= DB_SUCCESS;
+    break;
+  default:
+#ifdef UNIV_DEBUG
+    ut_ad("invalid state" == 0);
+    break;
+  case DB_DEADLOCK:
+  case DB_INTERRUPTED:
+#endif
+    goto end_loop;
+  }
+
+  while (wait_lock)
+  {
+    int err;
+    ut_ad(trx->lock.wait_lock);
+
+    DEBUG_SYNC_C("lock_wait_before_suspend");
+
+    if (no_timeout)
+    {
+      my_cond_wait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex);
+      err= 0;
+    }
+    else
+      err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex,
+                             &abstime);
+
+    wait_lock= trx->lock.wait_lock;
+
+    switch (trx->error_state) {
+    case DB_DEADLOCK:
+    case DB_INTERRUPTED:
+      break;
+#ifdef UNIV_DEBUG
+    case DB_LOCK_WAIT_TIMEOUT:
+    case DB_LOCK_WAIT:
+      ut_ad("invalid state" == 0);
+      break;
+#endif
+    default:
+      /* Dictionary transactions must ignore KILL, because they could
+      be executed as part of a multi-transaction DDL operation,
+      such as rollback_inplace_alter_table() or ha_innobase::delete_table(). */
+      if (!trx->dict_operation && trx_is_interrupted(trx))
+        /* innobase_kill_query() can only set trx->error_state=DB_INTERRUPTED
+        for any transaction that is attached to a connection. */
+        trx->error_state= DB_INTERRUPTED;
+      else if (!err)
+        continue;
+#ifdef WITH_WSREP
+      else if (trx->is_wsrep() && wsrep_is_BF_lock_timeout(*trx));
+#endif
+      else
+      {
+        trx->error_state= DB_LOCK_WAIT_TIMEOUT;
+        lock_sys.timeouts++;
+      }
+    }
+    break;
+  }
+
+end_loop:
+  if (row_lock_wait)
+    lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse());
+
+  ut_ad(!wait_lock == !trx->lock.wait_lock);
+
+  if (wait_lock)
+  {
+  abort_wait:
+    lock_sys_t::cancel<false>(trx, wait_lock);
+    lock_sys.deadlock_check();
+  }
+
+end_wait:
+  mysql_mutex_unlock(&lock_sys.wait_mutex);
+  DBUG_EXECUTE_IF("small_sleep_after_lock_wait",
+    {
+      if (!(type_mode & LOCK_TABLE) &&
+	  (type_mode & LOCK_MODE_MASK) == LOCK_X &&
+	  trx->error_state != DB_DEADLOCK && !trx_is_interrupted(trx)) {
+	      my_sleep(20000);
+      }
+    });
+  thd_wait_end(trx->mysql_thd);
+
+#ifdef UNIV_DEBUG
+  switch (trx->error_state) {
+  case DB_SUCCESS:
+  case DB_DEADLOCK:
+  case DB_INTERRUPTED:
+  case DB_LOCK_WAIT_TIMEOUT:
+    break;
+  default:
+    ut_ad("invalid state" == 0);
+  }
+#endif
+
+  return trx->error_state;
+}
+
+
+/** Resume a lock wait */
+template <bool from_deadlock= false>
+void lock_wait_end(trx_t *trx)
+{
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+  ut_ad(trx->mutex_is_owner());
+  ut_d(const auto state= trx->state);
+  ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE ||
+        state == TRX_STATE_PREPARED);
+  /* lock_wait() checks trx->lock.was_chosen_as_deadlock_victim flag before
+  requesting lock_sys.wait_mutex, and if the flag is set, it returns error,
+  what causes transaction rollback, which can reset trx->lock.wait_thr before
+  deadlock resolution starts cancelling victim's waiting lock. That's why we
+  don't check trx->lock.wait_thr here if the function was called from deadlock
+  resolution function. */
+  ut_ad(from_deadlock || trx->lock.wait_thr);
+
+  if (trx->lock.was_chosen_as_deadlock_victim)
+  {
+    ut_ad(from_deadlock || state == TRX_STATE_ACTIVE);
+    trx->error_state= DB_DEADLOCK;
+  }
+
+  trx->lock.wait_thr= nullptr;
+  pthread_cond_signal(&trx->lock.cond);
+}
+
+/** Grant a waiting lock request and release the waiting transaction. */
+static void lock_grant(lock_t *lock)
+{
+  lock_reset_lock_and_trx_wait(lock);
+  trx_t *trx= lock->trx;
+  trx->mutex_lock();
+  if (lock->mode() == LOCK_AUTO_INC)
+  {
+    dict_table_t *table= lock->un_member.tab_lock.table;
+    ut_ad(!table->autoinc_trx);
+    table->autoinc_trx= trx;
+    ib_vector_push(trx->autoinc_locks, &lock);
+  }
+
+  DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends", trx->id));
+
+  /* If we are resolving a deadlock by choosing another transaction as
+  a victim, then our original transaction may not be waiting anymore */
+
+  if (trx->lock.wait_thr)
+    lock_wait_end(trx);
+
+  trx->mutex_unlock();
+}
+
+/*************************************************************//**
+Cancels a waiting record lock request and releases the waiting transaction
+that requested it. NOTE: does NOT check if waiting lock requests behind this
+one can now be granted! */
+static void lock_rec_cancel(lock_t *lock)
+{
+  trx_t *trx= lock->trx;
+  mysql_mutex_lock(&lock_sys.wait_mutex);
+  trx->mutex_lock();
+
+  ut_d(lock_sys.hash_get(lock->type_mode).
+       assert_locked(lock->un_member.rec_lock.page_id));
+  /* Reset the bit (there can be only one set bit) in the lock bitmap */
+  lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
+
+  /* Reset the wait flag and the back pointer to lock in trx */
+  lock_reset_lock_and_trx_wait(lock);
+
+  /* The following releases the trx from lock wait */
+  lock_wait_end(trx);
+  mysql_mutex_unlock(&lock_sys.wait_mutex);
+  trx->mutex_unlock();
+}
+
+/** Remove a record lock request, waiting or granted, from the queue and
+grant locks to other transactions in the queue if they now are entitled
+to a lock. NOTE: all record locks contained in in_lock are removed.
+@param[in,out]	in_lock		record lock
+@param[in]	owns_wait_mutex	whether lock_sys.wait_mutex is held */
+static void lock_rec_dequeue_from_page(lock_t *in_lock, bool owns_wait_mutex)
+{
+#ifdef SAFE_MUTEX
+	ut_ad(owns_wait_mutex == mysql_mutex_is_owner(&lock_sys.wait_mutex));
+#endif /* SAFE_MUTEX */
+	ut_ad(!in_lock->is_table());
+
+	const page_id_t page_id{in_lock->un_member.rec_lock.page_id};
+	auto& lock_hash = lock_sys.hash_get(in_lock->type_mode);
+	ut_ad(lock_sys.is_writer() || in_lock->trx->mutex_is_owner());
+
+	ut_d(auto old_n_locks=)
+	in_lock->index->table->n_rec_locks--;
+	ut_ad(old_n_locks);
+
+	const ulint rec_fold = page_id.fold();
+	hash_cell_t &cell = *lock_hash.cell_get(rec_fold);
+	lock_sys.assert_locked(cell);
+
+	HASH_DELETE(lock_t, hash, &lock_hash, rec_fold, in_lock);
+	ut_ad(lock_sys.is_writer() || in_lock->trx->mutex_is_owner());
+	UT_LIST_REMOVE(in_lock->trx->lock.trx_locks, in_lock);
+
+	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
+	MONITOR_DEC(MONITOR_NUM_RECLOCK);
+
+	bool acquired = false;
+
+	/* Check if waiting locks in the queue can now be granted:
+	grant locks if there are no conflicting locks ahead. Stop at
+	the first X lock that is waiting or has been granted. */
+
+	for (lock_t* lock = lock_sys_t::get_first(cell, page_id);
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (!lock->is_waiting()) {
+			continue;
+		}
+
+		if (!owns_wait_mutex) {
+			mysql_mutex_lock(&lock_sys.wait_mutex);
+			acquired = owns_wait_mutex = true;
+		}
+
+		ut_ad(lock->trx->lock.wait_trx);
+		ut_ad(lock->trx->lock.wait_lock);
+
+		if (const lock_t* c = lock_rec_has_to_wait_in_queue(
+			    cell, lock)) {
+			trx_t* c_trx = c->trx;
+			lock->trx->lock.wait_trx = c_trx;
+			if (c_trx->lock.wait_trx
+			    && innodb_deadlock_detect
+			    && Deadlock::to_check.emplace(c_trx).second) {
+				Deadlock::to_be_checked = true;
+			}
+		} else {
+			/* Grant the lock */
+			ut_ad(lock->trx != in_lock->trx);
+			lock_grant(lock);
+		}
+	}
+
+	if (acquired) {
+		mysql_mutex_unlock(&lock_sys.wait_mutex);
+	}
+}
+
+/** Remove a record lock request, waiting or granted, on a discarded page
+@param hash     hash table
+@param in_lock  lock object */
+TRANSACTIONAL_TARGET
+void lock_rec_discard(lock_sys_t::hash_table &lock_hash, lock_t *in_lock)
+{
+  ut_ad(!in_lock->is_table());
+  lock_hash.assert_locked(in_lock->un_member.rec_lock.page_id);
+
+  HASH_DELETE(lock_t, hash, &lock_hash,
+              in_lock->un_member.rec_lock.page_id.fold(), in_lock);
+  ut_d(uint32_t old_locks);
+  {
+    trx_t *trx= in_lock->trx;
+    TMTrxGuard tg{*trx};
+    ut_d(old_locks=)
+    in_lock->index->table->n_rec_locks--;
+    UT_LIST_REMOVE(trx->lock.trx_locks, in_lock);
+  }
+  ut_ad(old_locks);
+  MONITOR_INC(MONITOR_RECLOCK_REMOVED);
+  MONITOR_DEC(MONITOR_NUM_RECLOCK);
+}
+
+/*************************************************************//**
+Removes record lock objects set on an index page which is discarded. This
+function does not move locks, or check for waiting locks, therefore the
+lock bitmaps must already be reset when this function is called. */
+static void
+lock_rec_free_all_from_discard_page(page_id_t id, const hash_cell_t &cell,
+                                    lock_sys_t::hash_table &lock_hash)
+{
+  for (lock_t *lock= lock_sys_t::get_first(cell, id); lock; )
+  {
+    ut_ad(&lock_hash != &lock_sys.rec_hash ||
+          lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
+    ut_ad(!lock->is_waiting());
+    lock_t *next_lock= lock_rec_get_next_on_page(lock);
+    lock_rec_discard(lock_hash, lock);
+    lock= next_lock;
+  }
+}
+
+/** Discard locks for an index when purging DELETE FROM SYS_INDEXES
+after an aborted CREATE INDEX operation.
+@param index   a stale index on which ADD INDEX operation was aborted */
+ATTRIBUTE_COLD void lock_discard_for_index(const dict_index_t &index)
+{
+  ut_ad(!index.is_committed());
+  /* This is very rarely executed code, and the size of the hash array
+  would exceed the maximum size of a memory transaction. */
+  LockMutexGuard g{SRW_LOCK_CALL};
+  const ulint n= lock_sys.rec_hash.pad(lock_sys.rec_hash.n_cells);
+  for (ulint i= 0; i < n; i++)
+  {
+    for (lock_t *lock= static_cast<lock_t*>(lock_sys.rec_hash.array[i].node);
+         lock; )
+    {
+      ut_ad(!lock->is_table());
+      if (lock->index == &index)
+      {
+        ut_ad(!lock->is_waiting());
+        lock_rec_discard(lock_sys.rec_hash, lock);
+        lock= static_cast<lock_t*>(lock_sys.rec_hash.array[i].node);
+      }
+      else
+        lock= lock->hash;
+    }
+  }
+}
+
+/*============= RECORD LOCK MOVING AND INHERITING ===================*/
+
+/*************************************************************//**
+Resets the lock bits for a single record. Releases transactions waiting for
+lock requests here. */
+TRANSACTIONAL_TARGET
+static
+void
+lock_rec_reset_and_release_wait(const hash_cell_t &cell, const page_id_t id,
+                                ulint heap_no)
+{
+  for (lock_t *lock= lock_sys.get_first(cell, id, heap_no); lock;
+       lock= lock_rec_get_next(heap_no, lock))
+  {
+    if (lock->is_waiting())
+      lock_rec_cancel(lock);
+    else
+    {
+      TMTrxGuard tg{*lock->trx};
+      lock_rec_reset_nth_bit(lock, heap_no);
+    }
+  }
+}
+
+/** Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
+of another record as gap type locks, but does not reset the lock bits of
+the other record. Also waiting lock requests on rec are inherited as
+GRANTED gap locks.
+@param heir_cell    heir hash table cell
+@param heir         page containing the record which inherits
+@param donor_cell   donor hash table cell
+@param donor        page containing the record from which inherited; does NOT
+                    reset the locks on this record
+@param heir_page    heir page frame
+@param heir_heap_no heap_no of the inheriting record
+@param heap_no      heap_no of the donating record
+@tparam from_split  true if the function is invoked from
+                    lock_update_split_(left|right)(), in this case not-gap
+                    locks are not inherited to supremum if transaction
+                    isolation level less or equal to READ COMMITTED */
+template <bool from_split= false>
+static void
+lock_rec_inherit_to_gap(hash_cell_t &heir_cell, const page_id_t heir,
+                        const hash_cell_t &donor_cell, const page_id_t donor,
+                        const page_t *heir_page, ulint heir_heap_no,
+                        ulint heap_no)
+{
+  ut_ad(!from_split || heir_heap_no == PAGE_HEAP_NO_SUPREMUM);
+
+  /* At READ UNCOMMITTED or READ COMMITTED isolation level,
+  we do not want locks set
+  by an UPDATE or a DELETE to be inherited as gap type locks. But we
+  DO want S-locks/X-locks(taken for replace) set by a consistency
+  constraint to be inherited also then. */
+
+  for (lock_t *lock= lock_sys_t::get_first(donor_cell, donor, heap_no); lock;
+       lock= lock_rec_get_next(heap_no, lock))
+  {
+    trx_t *lock_trx= lock->trx;
+    if (!lock->trx->is_not_inheriting_locks() &&
+        !lock->is_insert_intention() &&
+        (lock_trx->isolation_level > TRX_ISO_READ_COMMITTED ||
+         /* When we are in a page split (not purge), then we don't set a lock
+         on supremum if the donor lock type is LOCK_REC_NOT_GAP. That is, do
+         not create bogus gap locks for non-gap locks for READ UNCOMMITTED and
+         READ COMMITTED isolation levels. LOCK_ORDINARY and
+         LOCK_GAP require a gap before the record to be locked, that is why
+         setting lock on supremmum is necessary. */
+         ((!from_split || !lock->is_record_not_gap()) &&
+          lock->mode() != (lock_trx->duplicates ? LOCK_S : LOCK_X))))
+    {
+      lock_rec_add_to_queue(LOCK_GAP | lock->mode(), heir_cell, heir,
+                            heir_page, heir_heap_no, lock->index, lock_trx,
+                            false);
+    }
+  }
+}
+
+/*************************************************************//**
+Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
+of another record as gap type locks, but does not reset the lock bits of the
+other record. Also waiting lock requests are inherited as GRANTED gap locks. */
+static
+void
+lock_rec_inherit_to_gap_if_gap_lock(
+/*================================*/
+	const buf_block_t*	block,		/*!< in: buffer block */
+	ulint			heir_heap_no,	/*!< in: heap_no of
+						record which inherits */
+	ulint			heap_no)	/*!< in: heap_no of record
+						from which inherited;
+						does NOT reset the locks
+						on this record */
+{
+  const page_id_t id{block->page.id()};
+  LockGuard g{lock_sys.rec_hash, id};
+
+  for (lock_t *lock= lock_sys_t::get_first(g.cell(), id, heap_no); lock;
+       lock= lock_rec_get_next(heap_no, lock))
+     if (!lock->trx->is_not_inheriting_locks() &&
+         !lock->is_insert_intention() && (heap_no == PAGE_HEAP_NO_SUPREMUM ||
+                                          !lock->is_record_not_gap()) &&
+         !lock_table_has(lock->trx, lock->index->table, LOCK_X))
+       lock_rec_add_to_queue(LOCK_GAP | lock->mode(),
+                             g.cell(), id, block->page.frame,
+                             heir_heap_no, lock->index, lock->trx, false);
+}
+
+/*************************************************************//**
+Moves the locks of a record to another record and resets the lock bits of
+the donating record. */
+TRANSACTIONAL_TARGET
+static
+void
+lock_rec_move(
+	hash_cell_t&		receiver_cell,	/*!< in: hash table cell */
+	const buf_block_t&	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const page_id_t		receiver_id,	/*!< in: page identifier */
+	const hash_cell_t&	donator_cell,	/*!< in: hash table cell */
+	const page_id_t		donator_id,	/*!< in: page identifier of
+						the donating record */
+	ulint			receiver_heap_no,/*!< in: heap_no of the record
+						which gets the locks; there
+						must be no lock requests
+						on it! */
+	ulint			donator_heap_no)/*!< in: heap_no of the record
+						which gives the locks */
+{
+	ut_ad(!lock_sys_t::get_first(receiver_cell,
+				     receiver_id, receiver_heap_no));
+
+	for (lock_t *lock = lock_sys_t::get_first(donator_cell, donator_id,
+						  donator_heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next(donator_heap_no, lock)) {
+		const auto type_mode = lock->type_mode;
+		if (type_mode & LOCK_WAIT) {
+			ut_ad(lock->trx->lock.wait_lock == lock);
+			lock->type_mode &= ~LOCK_WAIT;
+		}
+
+		trx_t* lock_trx = lock->trx;
+		lock_trx->mutex_lock();
+		lock_rec_reset_nth_bit(lock, donator_heap_no);
+
+		/* Note that we FIRST reset the bit, and then set the lock:
+		the function works also if donator_id == receiver_id */
+
+		lock_rec_add_to_queue(type_mode, receiver_cell,
+				      receiver_id, receiver.page.frame,
+				      receiver_heap_no,
+				      lock->index, lock_trx, true);
+		lock_trx->mutex_unlock();
+	}
+
+	ut_ad(!lock_sys_t::get_first(donator_cell, donator_id,
+				     donator_heap_no));
+}
+
+/** Move all the granted locks to the front of the given lock list.
+All the waiting locks will be at the end of the list.
+@param[in,out]	lock_list	the given lock list.  */
+static
+void
+lock_move_granted_locks_to_front(
+	UT_LIST_BASE_NODE_T(lock_t)&	lock_list)
+{
+	lock_t*	lock;
+
+	bool seen_waiting_lock = false;
+
+	for (lock = UT_LIST_GET_FIRST(lock_list); lock;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+
+		if (!seen_waiting_lock) {
+			if (lock->is_waiting()) {
+				seen_waiting_lock = true;
+			}
+			continue;
+		}
+
+		ut_ad(seen_waiting_lock);
+
+		if (!lock->is_waiting()) {
+			lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
+			ut_a(prev);
+			ut_list_move_to_front(lock_list, lock);
+			lock = prev;
+		}
+	}
+}
+
+/*************************************************************//**
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+TRANSACTIONAL_TARGET
+void
+lock_move_reorganize_page(
+/*======================*/
+	const buf_block_t*	block,	/*!< in: old index page, now
+					reorganized */
+	const buf_block_t*	oblock)	/*!< in: copy of the old, not
+					reorganized page */
+{
+  mem_heap_t *heap;
+
+  {
+    UT_LIST_BASE_NODE_T(lock_t) old_locks;
+    UT_LIST_INIT(old_locks, &lock_t::trx_locks);
+
+    const page_id_t id{block->page.id()};
+    const auto id_fold= id.fold();
+    {
+      TMLockGuard g{lock_sys.rec_hash, id};
+      if (!lock_sys_t::get_first(g.cell(), id))
+        return;
+    }
+
+    /* We will modify arbitrary trx->lock.trx_locks.
+    Do not bother with a memory transaction; we are going
+    to allocate memory and copy a lot of data. */
+    LockMutexGuard g{SRW_LOCK_CALL};
+    hash_cell_t &cell= *lock_sys.rec_hash.cell_get(id_fold);
+
+    /* Note: Predicate locks for SPATIAL INDEX are not affected by
+    page reorganize, because they do not refer to individual record
+    heap numbers. */
+    lock_t *lock= lock_sys_t::get_first(cell, id);
+
+    if (!lock)
+      return;
+
+    heap= mem_heap_create(256);
+
+    /* Copy first all the locks on the page to heap and reset the
+    bitmaps in the original locks; chain the copies of the locks
+    using the trx_locks field in them. */
+
+    do
+    {
+      /* Make a copy of the lock */
+      lock_t *old_lock= lock_rec_copy(lock, heap);
+
+      UT_LIST_ADD_LAST(old_locks, old_lock);
+
+      /* Reset bitmap of lock */
+      lock_rec_bitmap_reset(lock);
+
+      if (lock->is_waiting())
+      {
+        ut_ad(lock->trx->lock.wait_lock == lock);
+        lock->type_mode&= ~LOCK_WAIT;
+      }
+
+      lock= lock_rec_get_next_on_page(lock);
+    }
+    while (lock);
+
+    const ulint comp= page_is_comp(block->page.frame);
+    ut_ad(comp == page_is_comp(oblock->page.frame));
+
+    lock_move_granted_locks_to_front(old_locks);
+
+    DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
+                    ut_list_reverse(old_locks););
+
+    for (lock= UT_LIST_GET_FIRST(old_locks); lock;
+         lock= UT_LIST_GET_NEXT(trx_locks, lock))
+    {
+      /* NOTE: we copy also the locks set on the infimum and
+      supremum of the page; the infimum may carry locks if an
+      update of a record is occurring on the page, and its locks
+      were temporarily stored on the infimum */
+      const rec_t *rec1= page_get_infimum_rec(block->page.frame);
+      const rec_t *rec2= page_get_infimum_rec(oblock->page.frame);
+
+      /* Set locks according to old locks */
+      for (;;)
+      {
+        ulint old_heap_no;
+        ulint new_heap_no;
+        ut_d(const rec_t* const orec= rec1);
+        ut_ad(page_rec_is_metadata(rec1) == page_rec_is_metadata(rec2));
+
+        if (comp)
+        {
+          old_heap_no= rec_get_heap_no_new(rec2);
+          new_heap_no= rec_get_heap_no_new(rec1);
+
+          rec1= page_rec_get_next_low(rec1, TRUE);
+          rec2= page_rec_get_next_low(rec2, TRUE);
+        }
+        else
+        {
+          old_heap_no= rec_get_heap_no_old(rec2);
+          new_heap_no= rec_get_heap_no_old(rec1);
+          ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
+
+          rec1= page_rec_get_next_low(rec1, FALSE);
+          rec2= page_rec_get_next_low(rec2, FALSE);
+        }
+
+        trx_t *lock_trx= lock->trx;
+	lock_trx->mutex_lock();
+
+	/* Clear the bit in old_lock. */
+	if (old_heap_no < lock->un_member.rec_lock.n_bits &&
+            lock_rec_reset_nth_bit(lock, old_heap_no))
+        {
+          ut_ad(!page_rec_is_metadata(orec));
+
+          /* NOTE that the old lock bitmap could be too
+          small for the new heap number! */
+          lock_rec_add_to_queue(lock->type_mode, cell, id, block->page.frame,
+                                new_heap_no, lock->index, lock_trx, true);
+        }
+
+	lock_trx->mutex_unlock();
+
+        if (!rec1 || !rec2)
+        {
+          ut_ad(!rec1 == !rec2);
+          ut_ad(new_heap_no == PAGE_HEAP_NO_SUPREMUM);
+          ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
+          break;
+        }
+      }
+
+      ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
+    }
+  }
+
+  mem_heap_free(heap);
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+  if (fil_space_t *space= fil_space_t::get(id.space()))
+  {
+    ut_ad(lock_rec_validate_page(block, space->is_latched()));
+    space->release();
+  }
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+TRANSACTIONAL_TARGET
+void
+lock_move_rec_list_end(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec)		/*!< in: record on page: this
+						is the first record moved */
+{
+  const ulint comp= page_rec_is_comp(rec);
+
+  ut_ad(block->page.frame == page_align(rec));
+  ut_ad(comp == page_is_comp(new_block->page.frame));
+
+  const page_id_t id{block->page.id()};
+  const page_id_t new_id{new_block->page.id()};
+  {
+    /* This would likely be too large for a memory transaction. */
+    LockMultiGuard g{lock_sys.rec_hash, id, new_id};
+
+    /* Note: when we move locks from record to record, waiting locks
+    and possible granted gap type locks behind them are enqueued in
+    the original order, because new elements are inserted to a hash
+    table to the end of the hash chain, and lock_rec_add_to_queue
+    does not reuse locks if there are waiters in the queue. */
+    for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock;
+         lock= lock_rec_get_next_on_page(lock))
+    {
+      const rec_t *rec1= rec;
+      const rec_t *rec2;
+      const auto type_mode= lock->type_mode;
+
+      if (comp)
+      {
+        if (page_offset(rec1) == PAGE_NEW_INFIMUM)
+          rec1= page_rec_get_next_low(rec1, TRUE);
+        rec2= page_rec_get_next_low(new_block->page.frame + PAGE_NEW_INFIMUM,
+                                    TRUE);
+      }
+      else
+      {
+        if (page_offset(rec1) == PAGE_OLD_INFIMUM)
+          rec1= page_rec_get_next_low(rec1, FALSE);
+        rec2= page_rec_get_next_low(new_block->page.frame + PAGE_OLD_INFIMUM,
+                                    FALSE);
+      }
+
+      if (UNIV_UNLIKELY(!rec1 || !rec2))
+      {
+        ut_ad("corrupted page" == 0);
+        return;
+      }
+
+      /* Copy lock requests on user records to new page and
+      reset the lock bits on the old */
+      for (;;)
+      {
+        ut_ad(page_rec_is_metadata(rec1) == page_rec_is_metadata(rec2));
+        ut_d(const rec_t* const orec= rec1);
+
+        ulint rec1_heap_no;
+        ulint rec2_heap_no;
+
+        if (comp)
+        {
+          rec1_heap_no= rec_get_heap_no_new(rec1);
+          if (!(rec1= page_rec_get_next_low(rec1, TRUE)))
+          {
+            ut_ad(rec1_heap_no == PAGE_HEAP_NO_SUPREMUM);
+            break;
+          }
+          rec2_heap_no= rec_get_heap_no_new(rec2);
+          rec2= page_rec_get_next_low(rec2, TRUE);
+        }
+        else
+        {
+          ut_d(const rec_t *old1= rec1);
+          rec1_heap_no= rec_get_heap_no_old(rec1);
+          if (!(rec1= page_rec_get_next_low(rec1, FALSE)))
+          {
+            ut_ad(rec1_heap_no == PAGE_HEAP_NO_SUPREMUM);
+            break;
+          }
+
+          ut_ad(rec_get_data_size_old(old1) == rec_get_data_size_old(rec2));
+          ut_ad(!memcmp(old1, rec2, rec_get_data_size_old(old1)));
+
+          rec2_heap_no= rec_get_heap_no_old(rec2);
+          rec2= page_rec_get_next_low(rec2, FALSE);
+        }
+
+        if (UNIV_UNLIKELY(!rec2))
+        {
+          ut_ad("corrupted page" == 0);
+          return;
+        }
+
+        trx_t *lock_trx= lock->trx;
+        lock_trx->mutex_lock();
+
+        if (rec1_heap_no < lock->un_member.rec_lock.n_bits &&
+            lock_rec_reset_nth_bit(lock, rec1_heap_no))
+        {
+          ut_ad(!page_rec_is_metadata(orec));
+
+          if (type_mode & LOCK_WAIT)
+          {
+            ut_ad(lock_trx->lock.wait_lock == lock);
+            lock->type_mode&= ~LOCK_WAIT;
+          }
+
+          lock_rec_add_to_queue(type_mode, g.cell2(), new_id,
+                                new_block->page.frame,
+                                rec2_heap_no, lock->index, lock_trx, true);
+        }
+
+        lock_trx->mutex_unlock();
+      }
+    }
+  }
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+  if (fil_space_t *space= fil_space_t::get(id.space()))
+  {
+    const bool is_latched{space->is_latched()};
+    ut_ad(lock_rec_validate_page(block, is_latched));
+    ut_ad(lock_rec_validate_page(new_block, is_latched));
+    space->release();
+  }
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+TRANSACTIONAL_TARGET
+void
+lock_move_rec_list_start(
+/*=====================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	const rec_t*		rec,		/*!< in: record on page:
+						this is the first
+						record NOT copied */
+	const rec_t*		old_end)	/*!< in: old
+						previous-to-last
+						record on new_page
+						before the records
+						were copied */
+{
+  const ulint comp= page_rec_is_comp(rec);
+
+  ut_ad(block->page.frame == page_align(rec));
+  ut_ad(comp == page_is_comp(new_block->page.frame));
+  ut_ad(new_block->page.frame == page_align(old_end));
+  ut_ad(!page_rec_is_metadata(rec));
+  const page_id_t id{block->page.id()};
+  const page_id_t new_id{new_block->page.id()};
+
+  {
+    /* This would likely be too large for a memory transaction. */
+    LockMultiGuard g{lock_sys.rec_hash, id, new_id};
+
+    for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock;
+         lock= lock_rec_get_next_on_page(lock))
+    {
+      const rec_t *rec1;
+      const rec_t *rec2;
+      const auto type_mode= lock->type_mode;
+
+      if (comp)
+      {
+        rec1= page_rec_get_next_low(block->page.frame + PAGE_NEW_INFIMUM,
+                                    TRUE);
+        rec2= page_rec_get_next_low(old_end, TRUE);
+      }
+      else
+      {
+        rec1= page_rec_get_next_low(block->page.frame + PAGE_OLD_INFIMUM,
+                                    FALSE);
+        rec2= page_rec_get_next_low(old_end, FALSE);
+      }
+
+      /* Copy lock requests on user records to new page and
+      reset the lock bits on the old */
+
+      while (rec1 != rec)
+      {
+        if (UNIV_UNLIKELY(!rec1 || !rec2))
+        {
+          ut_ad("corrupted page" == 0);
+          return;
+        }
+
+        ut_ad(page_rec_is_metadata(rec1) == page_rec_is_metadata(rec2));
+        ut_d(const rec_t* const prev= rec1);
+
+        ulint rec1_heap_no;
+        ulint rec2_heap_no;
+
+        if (comp)
+        {
+          rec1_heap_no= rec_get_heap_no_new(rec1);
+          rec2_heap_no= rec_get_heap_no_new(rec2);
+
+          rec1= page_rec_get_next_low(rec1, TRUE);
+          rec2= page_rec_get_next_low(rec2, TRUE);
+        }
+        else
+        {
+          rec1_heap_no= rec_get_heap_no_old(rec1);
+          rec2_heap_no= rec_get_heap_no_old(rec2);
+
+          ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
+
+          rec1= page_rec_get_next_low(rec1, FALSE);
+          rec2= page_rec_get_next_low(rec2, FALSE);
+        }
+
+        trx_t *lock_trx= lock->trx;
+        lock_trx->mutex_lock();
+
+        if (rec1_heap_no < lock->un_member.rec_lock.n_bits &&
+            lock_rec_reset_nth_bit(lock, rec1_heap_no))
+        {
+          ut_ad(!page_rec_is_metadata(prev));
+
+          if (type_mode & LOCK_WAIT)
+          {
+            ut_ad(lock_trx->lock.wait_lock == lock);
+            lock->type_mode&= ~LOCK_WAIT;
+          }
+
+          lock_rec_add_to_queue(type_mode, g.cell2(), new_id,
+                                new_block->page.frame,
+                                rec2_heap_no, lock->index, lock_trx, true);
+        }
+
+        lock_trx->mutex_unlock();
+      }
+
+#ifdef UNIV_DEBUG
+      if (page_rec_is_supremum(rec))
+        for (auto i= lock_rec_get_n_bits(lock); --i > PAGE_HEAP_NO_USER_LOW; )
+          ut_ad(!lock_rec_get_nth_bit(lock, i));
+#endif /* UNIV_DEBUG */
+    }
+  }
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+  ut_ad(lock_rec_validate_page(block));
+#endif
+}
+
+/*************************************************************//**
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+TRANSACTIONAL_TARGET
+void
+lock_rtr_move_rec_list(
+/*===================*/
+	const buf_block_t*	new_block,	/*!< in: index page to
+						move to */
+	const buf_block_t*	block,		/*!< in: index page */
+	rtr_rec_move_t*		rec_move,       /*!< in: recording records
+						moved */
+	ulint			num_move)       /*!< in: num of rec to move */
+{
+  if (!num_move)
+    return;
+
+  const ulint comp= page_rec_is_comp(rec_move[0].old_rec);
+
+  ut_ad(block->page.frame == page_align(rec_move[0].old_rec));
+  ut_ad(new_block->page.frame == page_align(rec_move[0].new_rec));
+  ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
+  const page_id_t id{block->page.id()};
+  const page_id_t new_id{new_block->page.id()};
+
+  {
+    /* This would likely be too large for a memory transaction. */
+    LockMultiGuard g{lock_sys.rec_hash, id, new_id};
+
+    for (lock_t *lock= lock_sys_t::get_first(g.cell1(), id); lock;
+         lock= lock_rec_get_next_on_page(lock))
+    {
+      const rec_t *rec1;
+      const rec_t *rec2;
+      const auto type_mode= lock->type_mode;
+
+      /* Copy lock requests on user records to new page and
+      reset the lock bits on the old */
+
+      for (ulint moved= 0; moved < num_move; moved++)
+      {
+        ulint rec1_heap_no;
+        ulint rec2_heap_no;
+
+        rec1= rec_move[moved].old_rec;
+        rec2= rec_move[moved].new_rec;
+        ut_ad(!page_rec_is_metadata(rec1));
+        ut_ad(!page_rec_is_metadata(rec2));
+
+        if (comp)
+        {
+          rec1_heap_no= rec_get_heap_no_new(rec1);
+          rec2_heap_no= rec_get_heap_no_new(rec2);
+        }
+        else
+        {
+          rec1_heap_no= rec_get_heap_no_old(rec1);
+          rec2_heap_no= rec_get_heap_no_old(rec2);
+
+          ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
+        }
+
+        trx_t *lock_trx= lock->trx;
+        lock_trx->mutex_lock();
+
+        if (rec1_heap_no < lock->un_member.rec_lock.n_bits &&
+            lock_rec_reset_nth_bit(lock, rec1_heap_no))
+        {
+          if (type_mode & LOCK_WAIT)
+          {
+            ut_ad(lock_trx->lock.wait_lock == lock);
+            lock->type_mode&= ~LOCK_WAIT;
+          }
+
+          lock_rec_add_to_queue(type_mode, g.cell2(), new_id,
+                                new_block->page.frame,
+                                rec2_heap_no, lock->index, lock_trx, true);
+
+          rec_move[moved].moved= true;
+        }
+
+        lock_trx->mutex_unlock();
+      }
+    }
+  }
+
+#ifdef UNIV_DEBUG_LOCK_VALIDATE
+  ut_ad(lock_rec_validate_page(block));
+#endif
+}
+/*************************************************************//**
+Updates the lock table when a page is split to the right. */
+void
+lock_update_split_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block)	/*!< in: left page */
+{
+  const ulint h= lock_get_min_heap_no(right_block);
+  const page_id_t l{left_block->page.id()};
+  const page_id_t r{right_block->page.id()};
+
+  /* This would likely be too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, l, r};
+
+  /* Move the locks on the supremum of the left page to the supremum
+  of the right page */
+
+  lock_rec_move(g.cell2(), *right_block, r, g.cell1(), l,
+                PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+
+  /* Inherit the locks to the supremum of left page from the successor
+  of the infimum on right page */
+  lock_rec_inherit_to_gap<true>(g.cell1(), l, g.cell2(), r,
+                                left_block->page.frame, PAGE_HEAP_NO_SUPREMUM,
+                                h);
+}
+
+void lock_update_node_pointer(const buf_block_t *left_block,
+                              const buf_block_t *right_block)
+{
+  const ulint h= lock_get_min_heap_no(right_block);
+  const page_id_t l{left_block->page.id()};
+  const page_id_t r{right_block->page.id()};
+  LockMultiGuard g{lock_sys.rec_hash, l, r};
+
+  lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, right_block->page.frame,
+                          h, PAGE_HEAP_NO_SUPREMUM);
+}
+
+#ifdef UNIV_DEBUG
+static void lock_assert_no_spatial(const page_id_t id)
+{
+  const auto id_fold= id.fold();
+  auto cell= lock_sys.prdt_page_hash.cell_get(id_fold);
+  auto latch= lock_sys_t::hash_table::latch(cell);
+  latch->acquire();
+  /* there should exist no page lock on the left page,
+  otherwise, it will be blocked from merge */
+  ut_ad(!lock_sys_t::get_first(*cell, id));
+  latch->release();
+  cell= lock_sys.prdt_hash.cell_get(id_fold);
+  latch= lock_sys_t::hash_table::latch(cell);
+  latch->acquire();
+  ut_ad(!lock_sys_t::get_first(*cell, id));
+  latch->release();
+}
+#endif
+
+/*************************************************************//**
+Updates the lock table when a page is merged to the right. */
+void
+lock_update_merge_right(
+/*====================*/
+	const buf_block_t*	right_block,	/*!< in: right page to
+						which merged */
+	const rec_t*		orig_succ,	/*!< in: original
+						successor of infimum
+						on the right page
+						before merge */
+	const buf_block_t*	left_block)	/*!< in: merged index
+						page which will be
+						discarded */
+{
+  ut_ad(!page_rec_is_metadata(orig_succ));
+
+  const page_id_t l{left_block->page.id()};
+  const page_id_t r{right_block->page.id()};
+  /* This would likely be too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, l, r};
+
+  /* Inherit the locks from the supremum of the left page to the
+  original successor of infimum on the right page, to which the left
+  page was merged */
+  lock_rec_inherit_to_gap(g.cell2(), r, g.cell1(), l, right_block->page.frame,
+                          page_rec_get_heap_no(orig_succ),
+                          PAGE_HEAP_NO_SUPREMUM);
+
+  /* Reset the locks on the supremum of the left page, releasing
+  waiting transactions */
+  lock_rec_reset_and_release_wait(g.cell1(), l, PAGE_HEAP_NO_SUPREMUM);
+  lock_rec_free_all_from_discard_page(l, g.cell1(), lock_sys.rec_hash);
+
+  ut_d(lock_assert_no_spatial(l));
+}
+
+/** Update locks when the root page is copied to another in
+btr_root_raise_and_insert(). Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+void lock_update_root_raise(const buf_block_t &block, const page_id_t root)
+{
+  const page_id_t id{block.page.id()};
+  /* This would likely be too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, id, root};
+  /* Move the locks on the supremum of the root to the supremum of block */
+  lock_rec_move(g.cell1(), block, id, g.cell2(), root,
+                PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+}
+
+/** Update the lock table when a page is copied to another.
+@param new_block  the target page
+@param old        old page (not index root page) */
+void lock_update_copy_and_discard(const buf_block_t &new_block, page_id_t old)
+{
+  const page_id_t id{new_block.page.id()};
+  /* This would likely be too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, id, old};
+  /* Move the locks on the supremum of the old page to the supremum of new */
+  lock_rec_move(g.cell1(), new_block, id, g.cell2(), old,
+                PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+  lock_rec_free_all_from_discard_page(old, g.cell2(), lock_sys.rec_hash);
+}
+
+/*************************************************************//**
+Updates the lock table when a page is split to the left. */
+void
+lock_update_split_left(
+/*===================*/
+	const buf_block_t*	right_block,	/*!< in: right page */
+	const buf_block_t*	left_block)	/*!< in: left page */
+{
+  ulint h= lock_get_min_heap_no(right_block);
+  const page_id_t l{left_block->page.id()};
+  const page_id_t r{right_block->page.id()};
+  LockMultiGuard g{lock_sys.rec_hash, l, r};
+  /* Inherit the locks to the supremum of the left page from the
+  successor of the infimum on the right page */
+  lock_rec_inherit_to_gap<true>(g.cell1(), l, g.cell2(), r,
+                                left_block->page.frame, PAGE_HEAP_NO_SUPREMUM,
+                                h);
+}
+
+/** Update the lock table when a page is merged to the left.
+@param left      left page
+@param orig_pred original predecessor of supremum on the left page before merge
+@param right     merged, to-be-discarded right page */
+void lock_update_merge_left(const buf_block_t& left, const rec_t *orig_pred,
+                            const page_id_t right)
+{
+  ut_ad(left.page.frame == page_align(orig_pred));
+
+  const page_id_t l{left.page.id()};
+  const rec_t *left_next_rec= page_rec_get_next_const(orig_pred);
+  if (UNIV_UNLIKELY(!left_next_rec))
+  {
+    ut_ad("corrupted page" == 0);
+    return;
+  }
+
+  /* This would likely be too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, l, right};
+  if (!page_rec_is_supremum(left_next_rec))
+  {
+    /* Inherit the locks on the supremum of the left page to the
+    first record which was moved from the right page */
+    lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left.page.frame,
+                            page_rec_get_heap_no(left_next_rec),
+                            PAGE_HEAP_NO_SUPREMUM);
+
+    /* Reset the locks on the supremum of the left page,
+    releasing waiting transactions */
+    lock_rec_reset_and_release_wait(g.cell1(), l, PAGE_HEAP_NO_SUPREMUM);
+  }
+
+  /* Move the locks from the supremum of right page to the supremum
+  of the left page */
+  lock_rec_move(g.cell1(), left, l, g.cell2(), right,
+                PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
+  lock_rec_free_all_from_discard_page(right, g.cell2(), lock_sys.rec_hash);
+
+  /* there should exist no page lock on the right page,
+  otherwise, it will be blocked from merge */
+  ut_d(lock_assert_no_spatial(right));
+}
+
+/*************************************************************//**
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+	const buf_block_t&	heir_block,	/*!< in: block containing the
+						record which inherits */
+	const page_id_t		donor,		/*!< in: page containing the
+						record from which inherited;
+						does NOT reset the locks on
+						this record */
+	ulint			heir_heap_no,	/*!< in: heap_no of the
+						inheriting record */
+	ulint			heap_no)	/*!< in: heap_no of the
+						donating record */
+{
+  const page_id_t heir{heir_block.page.id()};
+  /* This is a rare operation and likely too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, heir, donor};
+  lock_rec_reset_and_release_wait(g.cell1(), heir, heir_heap_no);
+  lock_rec_inherit_to_gap(g.cell1(), heir, g.cell2(), donor,
+                          heir_block.page.frame, heir_heap_no, heap_no);
+}
+
+/*************************************************************//**
+Updates the lock table when a page is discarded. */
+void
+lock_update_discard(
+/*================*/
+	const buf_block_t*	heir_block,	/*!< in: index page
+						which will inherit the locks */
+	ulint			heir_heap_no,	/*!< in: heap_no of the record
+						which will inherit the locks */
+	const buf_block_t*	block)		/*!< in: index page
+						which will be discarded */
+{
+	const page_t*	page = block->page.frame;
+	const rec_t*	rec;
+	ulint		heap_no;
+	const page_id_t	heir(heir_block->page.id());
+	const page_id_t	page_id(block->page.id());
+	/* This would likely be too large for a memory transaction. */
+	LockMultiGuard	g{lock_sys.rec_hash, heir, page_id};
+
+	if (lock_sys_t::get_first(g.cell2(), page_id)) {
+		ut_d(lock_assert_no_spatial(page_id));
+		/* Inherit all the locks on the page to the record and
+		reset all the locks on the page */
+
+		if (page_is_comp(page)) {
+			rec = page + PAGE_NEW_INFIMUM;
+
+			do {
+				heap_no = rec_get_heap_no_new(rec);
+
+				lock_rec_inherit_to_gap(g.cell1(), heir,
+							g.cell2(), page_id,
+							heir_block->page.frame,
+							heir_heap_no, heap_no);
+
+				lock_rec_reset_and_release_wait(
+					g.cell2(), page_id, heap_no);
+
+				rec = page + rec_get_next_offs(rec, TRUE);
+			} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+		} else {
+			rec = page + PAGE_OLD_INFIMUM;
+
+			do {
+				heap_no = rec_get_heap_no_old(rec);
+
+				lock_rec_inherit_to_gap(g.cell1(), heir,
+							g.cell2(), page_id,
+							heir_block->page.frame,
+							heir_heap_no, heap_no);
+
+				lock_rec_reset_and_release_wait(
+					g.cell2(), page_id, heap_no);
+
+				rec = page + rec_get_next_offs(rec, FALSE);
+			} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
+		}
+
+		lock_rec_free_all_from_discard_page(page_id, g.cell2(),
+						    lock_sys.rec_hash);
+	} else {
+		const auto fold = page_id.fold();
+		auto cell = lock_sys.prdt_hash.cell_get(fold);
+		auto latch = lock_sys_t::hash_table::latch(cell);
+		latch->acquire();
+		lock_rec_free_all_from_discard_page(page_id, *cell,
+						    lock_sys.prdt_hash);
+		latch->release();
+		cell = lock_sys.prdt_page_hash.cell_get(fold);
+		latch = lock_sys_t::hash_table::latch(cell);
+		latch->acquire();
+		lock_rec_free_all_from_discard_page(page_id, *cell,
+						    lock_sys.prdt_page_hash);
+		latch->release();
+	}
+}
+
+/*************************************************************//**
+Updates the lock table when a new user record is inserted. */
+void
+lock_update_insert(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: the inserted record */
+{
+	ulint	receiver_heap_no;
+	ulint	donator_heap_no;
+
+	ut_ad(block->page.frame == page_align(rec));
+	ut_ad(!page_rec_is_metadata(rec));
+
+	/* Inherit the gap-locking locks for rec, in gap mode, from the next
+	record */
+
+	if (page_rec_is_comp(rec)) {
+		receiver_heap_no = rec_get_heap_no_new(rec);
+		rec = page_rec_get_next_low(rec, TRUE);
+		if (UNIV_UNLIKELY(!rec)) {
+			return;
+		}
+		donator_heap_no = rec_get_heap_no_new(rec);
+	} else {
+		receiver_heap_no = rec_get_heap_no_old(rec);
+		rec = page_rec_get_next_low(rec, FALSE);
+		if (UNIV_UNLIKELY(!rec)) {
+			return;
+		}
+		donator_heap_no = rec_get_heap_no_old(rec);
+	}
+
+	lock_rec_inherit_to_gap_if_gap_lock(
+		block, receiver_heap_no, donator_heap_no);
+}
+
+/*************************************************************//**
+Updates the lock table when a record is removed. */
+void
+lock_update_delete(
+/*===============*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: the record to be removed */
+{
+	const page_t*	page = block->page.frame;
+	ulint		heap_no;
+	ulint		next_heap_no;
+
+	ut_ad(page == page_align(rec));
+	ut_ad(!page_rec_is_metadata(rec));
+
+	if (page_is_comp(page)) {
+		heap_no = rec_get_heap_no_new(rec);
+		next_heap_no = rec_get_heap_no_new(page
+						   + rec_get_next_offs(rec,
+								       TRUE));
+	} else {
+		heap_no = rec_get_heap_no_old(rec);
+		next_heap_no = rec_get_heap_no_old(page
+						   + rec_get_next_offs(rec,
+								       FALSE));
+	}
+
+	const page_id_t id{block->page.id()};
+	LockGuard g{lock_sys.rec_hash, id};
+
+	/* Let the next record inherit the locks from rec, in gap mode */
+
+	lock_rec_inherit_to_gap(g.cell(), id, g.cell(), id, block->page.frame,
+				next_heap_no, heap_no);
+
+	/* Reset the lock bits on rec and release waiting transactions */
+	lock_rec_reset_and_release_wait(g.cell(), id, heap_no);
+}
+
+/*********************************************************************//**
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is moved in such an update, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+	const buf_block_t*	block,	/*!< in: buffer block containing rec */
+	const rec_t*		rec)	/*!< in: record whose lock state
+					is stored on the infimum
+					record of the same page; lock
+					bits are reset on the
+					record */
+{
+  const ulint heap_no= page_rec_get_heap_no(rec);
+
+  ut_ad(block->page.frame == page_align(rec));
+  const page_id_t id{block->page.id()};
+#ifdef ENABLED_DEBUG_SYNC
+  SCOPE_EXIT([]() { DEBUG_SYNC_C("lock_rec_store_on_page_infimum_end"); });
+#endif
+
+  LockGuard g{lock_sys.rec_hash, id};
+  lock_rec_move(g.cell(), *block, id, g.cell(), id,
+                PAGE_HEAP_NO_INFIMUM, heap_no);
+}
+
+/** Restore the explicit lock requests on a single record, where the
+state was stored on the infimum of a page.
+@param block   buffer block containing rec
+@param rec     record whose lock state is restored
+@param donator page (rec is not necessarily on this page)
+whose infimum stored the lock state; lock bits are reset on the infimum */
+void lock_rec_restore_from_page_infimum(const buf_block_t &block,
+					const rec_t *rec, page_id_t donator)
+{
+  const ulint heap_no= page_rec_get_heap_no(rec);
+  const page_id_t id{block.page.id()};
+  LockMultiGuard g{lock_sys.rec_hash, id, donator};
+  lock_rec_move(g.cell1(), block, id, g.cell2(), donator, heap_no,
+                PAGE_HEAP_NO_INFIMUM);
+}
+
+/*========================= TABLE LOCKS ==============================*/
+
+/**
+Create a table lock, without checking for deadlocks or lock compatibility.
+@param table      table on which the lock is created
+@param type_mode  lock type and mode
+@param trx        transaction
+@param c_lock     conflicting lock
+@return the created lock object */
+lock_t *lock_table_create(dict_table_t *table, unsigned type_mode, trx_t *trx,
+                          lock_t *c_lock)
+{
+	lock_t*		lock;
+
+	lock_sys.assert_locked(*table);
+	ut_ad(trx->mutex_is_owner());
+	ut_ad(!trx->is_wsrep() || lock_sys.is_writer());
+	ut_ad(trx->state == TRX_STATE_ACTIVE || trx->is_recovered);
+	ut_ad(!trx->is_autocommit_non_locking());
+	/* During CREATE TABLE, we will write to newly created FTS_*_CONFIG
+	on which no lock has been created yet. */
+	ut_ad(!trx->dict_operation_lock_mode
+	      || (strstr(table->name.m_name, "/FTS_")
+		  && strstr(table->name.m_name, "_CONFIG") + sizeof("_CONFIG")
+		  == table->name.m_name + strlen(table->name.m_name) + 1));
+
+	switch (LOCK_MODE_MASK & type_mode) {
+	case LOCK_AUTO_INC:
+		++table->n_waiting_or_granted_auto_inc_locks;
+		/* For AUTOINC locking we reuse the lock instance only if
+		there is no wait involved else we allocate the waiting lock
+		from the transaction lock heap. */
+		if (type_mode == LOCK_AUTO_INC) {
+			lock = table->autoinc_lock;
+
+			ut_ad(!table->autoinc_trx);
+			table->autoinc_trx = trx;
+
+			ib_vector_push(trx->autoinc_locks, &lock);
+			goto allocated;
+		}
+
+		break;
+	case LOCK_X:
+	case LOCK_S:
+		++table->n_lock_x_or_s;
+		break;
+	}
+
+	lock = trx->lock.table_cached < array_elements(trx->lock.table_pool)
+		? &trx->lock.table_pool[trx->lock.table_cached++]
+		: static_cast<lock_t*>(
+			mem_heap_alloc(trx->lock.lock_heap, sizeof *lock));
+
+allocated:
+	lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
+	lock->trx = trx;
+
+	lock->un_member.tab_lock.table = table;
+
+	ut_ad(table->get_ref_count() > 0 || !table->can_be_evicted);
+
+	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
+
+	ut_list_append(table->locks, lock, TableLockGetNode());
+
+	if (type_mode & LOCK_WAIT) {
+		if (trx->lock.wait_trx) {
+			ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx);
+			ut_ad(trx->lock.wait_lock);
+			ut_ad((*trx->lock.wait_lock).trx == trx);
+		} else {
+			ut_ad(c_lock);
+			trx->lock.wait_trx = c_lock->trx;
+			ut_ad(!trx->lock.wait_lock);
+		}
+		trx->lock.wait_lock = lock;
+	}
+
+	lock->trx->lock.table_locks.push_back(lock);
+
+	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
+	MONITOR_INC(MONITOR_NUM_TABLELOCK);
+
+	return(lock);
+}
+
+/*************************************************************//**
+Pops autoinc lock requests from the transaction's autoinc_locks. We
+handle the case where there are gaps in the array and they need to
+be popped off the stack. */
+UNIV_INLINE
+void
+lock_table_pop_autoinc_locks(
+/*=========================*/
+	trx_t*	trx)	/*!< in/out: transaction that owns the AUTOINC locks */
+{
+	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
+
+	/* Skip any gaps, gaps are NULL lock entries in the
+	trx->autoinc_locks vector. */
+
+	do {
+		ib_vector_pop(trx->autoinc_locks);
+
+		if (ib_vector_is_empty(trx->autoinc_locks)) {
+			return;
+		}
+
+	} while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
+}
+
+/*************************************************************//**
+Removes an autoinc lock request from the transaction's autoinc_locks. */
+UNIV_INLINE
+void
+lock_table_remove_autoinc_lock(
+/*===========================*/
+	lock_t*	lock,	/*!< in: table lock */
+	trx_t*	trx)	/*!< in/out: transaction that owns the lock */
+{
+	ut_ad(lock->type_mode == (LOCK_AUTO_INC | LOCK_TABLE));
+	lock_sys.assert_locked(*lock->un_member.tab_lock.table);
+	ut_ad(trx->mutex_is_owner());
+
+	auto s = ib_vector_size(trx->autoinc_locks);
+	ut_ad(s);
+
+	/* With stored functions and procedures the user may drop
+	a table within the same "statement". This special case has
+	to be handled by deleting only those AUTOINC locks that were
+	held by the table being dropped. */
+
+	lock_t*	autoinc_lock = *static_cast<lock_t**>(
+		ib_vector_get(trx->autoinc_locks, --s));
+
+	/* This is the default fast case. */
+
+	if (autoinc_lock == lock) {
+		lock_table_pop_autoinc_locks(trx);
+	} else {
+		/* The last element should never be NULL */
+		ut_a(autoinc_lock != NULL);
+
+		/* Handle freeing the locks from within the stack. */
+
+		while (s) {
+			autoinc_lock = *static_cast<lock_t**>(
+				ib_vector_get(trx->autoinc_locks, --s));
+
+			if (autoinc_lock == lock) {
+				void*	null_var = NULL;
+				ib_vector_set(trx->autoinc_locks, s, &null_var);
+				return;
+			}
+		}
+
+		/* Must find the autoinc lock. */
+		ut_error;
+	}
+}
+
+/*************************************************************//**
+Removes a table lock request from the queue and the trx list of locks;
+this is a low-level function which does NOT check if waiting requests
+can now be granted. */
+UNIV_INLINE
+const dict_table_t*
+lock_table_remove_low(
+/*==================*/
+	lock_t*	lock)	/*!< in/out: table lock */
+{
+	ut_ad(lock->is_table());
+
+	trx_t*		trx;
+	dict_table_t*	table;
+
+	ut_ad(lock->is_table());
+	trx = lock->trx;
+	table = lock->un_member.tab_lock.table;
+	lock_sys.assert_locked(*table);
+	ut_ad(trx->mutex_is_owner());
+
+	/* Remove the table from the transaction's AUTOINC vector, if
+	the lock that is being released is an AUTOINC lock. */
+	switch (lock->mode()) {
+	case LOCK_AUTO_INC:
+		ut_ad((table->autoinc_trx == trx) == !lock->is_waiting());
+
+		if (table->autoinc_trx == trx) {
+			table->autoinc_trx = NULL;
+			/* The locks must be freed in the reverse order from
+			the one in which they were acquired. This is to avoid
+			traversing the AUTOINC lock vector unnecessarily.
+
+			We only store locks that were granted in the
+			trx->autoinc_locks vector (see lock_table_create()
+			and lock_grant()). */
+			lock_table_remove_autoinc_lock(lock, trx);
+		}
+
+		ut_ad(table->n_waiting_or_granted_auto_inc_locks);
+		--table->n_waiting_or_granted_auto_inc_locks;
+		break;
+	case LOCK_X:
+	case LOCK_S:
+		ut_ad(table->n_lock_x_or_s);
+		--table->n_lock_x_or_s;
+		break;
+	default:
+		break;
+	}
+
+	UT_LIST_REMOVE(trx->lock.trx_locks, lock);
+	ut_list_remove(table->locks, lock, TableLockGetNode());
+
+	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
+	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
+	return table;
+}
+
+/*********************************************************************//**
+Enqueues a waiting request for a table lock which cannot be granted
+immediately. Checks for deadlocks.
+@retval	DB_LOCK_WAIT	if the waiting lock was enqueued
+@retval	DB_DEADLOCK	if this transaction was chosen as the victim */
+static
+dberr_t
+lock_table_enqueue_waiting(
+/*=======================*/
+	unsigned	mode,	/*!< in: lock mode this transaction is
+				requesting */
+	dict_table_t*	table,	/*!< in/out: table */
+	que_thr_t*	thr,	/*!< in: query thread */
+	lock_t*		c_lock)	/*!< in: conflicting lock or NULL */
+{
+	lock_sys.assert_locked(*table);
+	ut_ad(!srv_read_only_mode);
+
+	trx_t* trx = thr_get_trx(thr);
+	ut_ad(trx->mutex_is_owner());
+	ut_ad(!trx->dict_operation_lock_mode);
+
+	/* Enqueue the lock request that will wait to be granted */
+	lock_table_create(table, mode | LOCK_WAIT, trx, c_lock);
+
+	trx->lock.wait_thr = thr;
+        /* Apart from Galera, only transactions that have waiting lock
+        may be chosen as deadlock victims. Only one lock can be waited for at a
+        time, and a transaction is associated with a single thread. That is why
+        there must not be waiting lock requests if the transaction is deadlock
+        victim and it is not WSREP. Galera transaction abort can be invoked
+        from MDL acquisition code when the transaction does not have waiting
+        lock, that's why we check only deadlock victim bit here. */
+        ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
+
+	MONITOR_INC(MONITOR_TABLELOCK_WAIT);
+	return(DB_LOCK_WAIT);
+}
+
+/*********************************************************************//**
+Checks if other transactions have an incompatible mode lock request in
+the lock queue.
+@return lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_table_other_has_incompatible(
+/*==============================*/
+	const trx_t*		trx,	/*!< in: transaction, or NULL if all
+					transactions should be included */
+	ulint			wait,	/*!< in: LOCK_WAIT if also
+					waiting locks are taken into
+					account, or 0 if not */
+	const dict_table_t*	table,	/*!< in: table */
+	lock_mode		mode)	/*!< in: lock mode */
+{
+	lock_sys.assert_locked(*table);
+
+	static_assert(LOCK_IS == 0, "compatibility");
+	static_assert(LOCK_IX == 1, "compatibility");
+
+	if (UNIV_LIKELY(mode <= LOCK_IX && !table->n_lock_x_or_s)) {
+		return(NULL);
+	}
+
+	for (lock_t* lock = UT_LIST_GET_LAST(table->locks);
+	     lock;
+	     lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
+
+		trx_t* lock_trx = lock->trx;
+
+		if (lock_trx != trx
+		    && !lock_mode_compatible(lock->mode(), mode)
+		    && (wait || !lock->is_waiting())) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/** Aqcuire or enqueue a table lock */
+static dberr_t lock_table_low(dict_table_t *table, lock_mode mode,
+                              que_thr_t *thr, trx_t *trx)
+{
+  DBUG_EXECUTE_IF("innodb_table_deadlock", return DB_DEADLOCK;);
+  lock_t *wait_for=
+    lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode);
+  dberr_t err= DB_SUCCESS;
+
+  trx->mutex_lock();
+
+  if (wait_for)
+    err= lock_table_enqueue_waiting(mode, table, thr, wait_for);
+  else
+    lock_table_create(table, mode, trx, nullptr);
+
+  trx->mutex_unlock();
+
+  return err;
+}
+
+#ifdef WITH_WSREP
+/** Aqcuire or enqueue a table lock in Galera replication mode. */
+ATTRIBUTE_NOINLINE
+static dberr_t lock_table_wsrep(dict_table_t *table, lock_mode mode,
+                                que_thr_t *thr, trx_t *trx)
+{
+  LockMutexGuard g{SRW_LOCK_CALL};
+  return lock_table_low(table, mode, thr, trx);
+}
+#endif
+
+/** Acquire a table lock.
+@param table   table to be locked
+@param fktable pointer to table, in case of a FOREIGN key check
+@param mode    lock mode
+@param thr     SQL execution thread
+@retval DB_SUCCESS    if the lock was acquired
+@retval DB_DEADLOCK   if a deadlock occurred, or fktable && *fktable != table
+@retval DB_LOCK_WAIT  if lock_wait() must be invoked */
+dberr_t lock_table(dict_table_t *table, dict_table_t *const*fktable,
+                   lock_mode mode, que_thr_t *thr)
+{
+  ut_ad(table);
+
+  if (!fktable && table->is_temporary())
+    return DB_SUCCESS;
+
+  ut_ad(fktable || table->get_ref_count() || !table->can_be_evicted);
+
+  trx_t *trx= thr_get_trx(thr);
+
+  /* Look for equal or stronger locks the same trx already has on the
+  table. No need to acquire LockMutexGuard here because only the
+  thread that is executing a transaction can access trx_t::table_locks. */
+  if (lock_table_has(trx, table, mode) || srv_read_only_mode)
+    return DB_SUCCESS;
+
+  if ((mode == LOCK_IX || mode == LOCK_X) &&
+      !trx->read_only && !trx->rsegs.m_redo.rseg)
+    trx_set_rw_mode(trx);
+
+#ifdef WITH_WSREP
+  if (trx->is_wsrep())
+    return lock_table_wsrep(table, mode, thr, trx);
+#endif
+  lock_sys.rd_lock(SRW_LOCK_CALL);
+  dberr_t err;
+  if (fktable != nullptr && *fktable != table)
+    err= DB_DEADLOCK;
+  else
+  {
+    table->lock_mutex_lock();
+    err= lock_table_low(table, mode, thr, trx);
+    table->lock_mutex_unlock();
+  }
+  lock_sys.rd_unlock();
+
+  return err;
+}
+
+/** Create a table lock object for a resurrected transaction.
+@param table    table to be X-locked
+@param trx      transaction
+@param mode     LOCK_X or LOCK_IX */
+void lock_table_resurrect(dict_table_t *table, trx_t *trx, lock_mode mode)
+{
+  ut_ad(trx->is_recovered);
+  ut_ad(mode == LOCK_X || mode == LOCK_IX);
+
+  if (lock_table_has(trx, table, mode))
+    return;
+
+  {
+    /* This is executed at server startup while no connections
+    are alowed. Do not bother with lock elision. */
+    LockMutexGuard g{SRW_LOCK_CALL};
+    ut_ad(!lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode));
+
+    trx->mutex_lock();
+    lock_table_create(table, mode, trx);
+  }
+  trx->mutex_unlock();
+}
+
+/** Find a lock that a waiting table lock request still has to wait for. */
+static const lock_t *lock_table_has_to_wait_in_queue(const lock_t *wait_lock)
+{
+  ut_ad(wait_lock->is_waiting());
+  ut_ad(wait_lock->is_table());
+
+  dict_table_t *table= wait_lock->un_member.tab_lock.table;
+  lock_sys.assert_locked(*table);
+
+  static_assert(LOCK_IS == 0, "compatibility");
+  static_assert(LOCK_IX == 1, "compatibility");
+
+  if (UNIV_LIKELY(wait_lock->mode() <= LOCK_IX && !table->n_lock_x_or_s))
+    return nullptr;
+
+  for (const lock_t *lock= UT_LIST_GET_FIRST(table->locks); lock != wait_lock;
+       lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock))
+    if (lock_has_to_wait(wait_lock, lock))
+      return lock;
+
+  return nullptr;
+}
+
+/*************************************************************//**
+Removes a table lock request, waiting or granted, from the queue and grants
+locks to other transactions in the queue, if they now are entitled to a
+lock.
+@param[in,out]	in_lock		table lock
+@param[in]	owns_wait_mutex	whether lock_sys.wait_mutex is held */
+static void lock_table_dequeue(lock_t *in_lock, bool owns_wait_mutex)
+{
+#ifdef SAFE_MUTEX
+	ut_ad(owns_wait_mutex == mysql_mutex_is_owner(&lock_sys.wait_mutex));
+#endif
+	ut_ad(in_lock->trx->mutex_is_owner());
+	lock_t*	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
+
+	const dict_table_t* table = lock_table_remove_low(in_lock);
+
+	static_assert(LOCK_IS == 0, "compatibility");
+	static_assert(LOCK_IX == 1, "compatibility");
+
+	if (UNIV_LIKELY(in_lock->mode() <= LOCK_IX && !table->n_lock_x_or_s)) {
+		return;
+	}
+
+	bool acquired = false;
+
+	/* Check if waiting locks in the queue can now be granted: grant
+	locks if there are no conflicting locks ahead. */
+
+	for (/* No op */;
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
+		if (!lock->is_waiting()) {
+			continue;
+		}
+
+		if (!owns_wait_mutex) {
+			mysql_mutex_lock(&lock_sys.wait_mutex);
+			acquired = owns_wait_mutex = true;
+		}
+
+		ut_ad(lock->trx->lock.wait_trx);
+		ut_ad(lock->trx->lock.wait_lock);
+
+		if (const lock_t* c = lock_table_has_to_wait_in_queue(lock)) {
+			trx_t* c_trx = c->trx;
+			lock->trx->lock.wait_trx = c_trx;
+			if (c_trx->lock.wait_trx
+			    && innodb_deadlock_detect
+			    && Deadlock::to_check.emplace(c_trx).second) {
+				Deadlock::to_be_checked = true;
+			}
+		} else {
+			/* Grant the lock */
+			ut_ad(in_lock->trx != lock->trx);
+			in_lock->trx->mutex_unlock();
+			lock_grant(lock);
+			in_lock->trx->mutex_lock();
+		}
+	}
+
+	if (acquired) {
+		mysql_mutex_unlock(&lock_sys.wait_mutex);
+	}
+}
+
+
+/** Sets a lock on a table based on the given mode.
+@param table	table to lock
+@param trx	transaction
+@param mode	LOCK_X or LOCK_S
+@param no_wait  whether to skip handling DB_LOCK_WAIT
+@return error code */
+dberr_t lock_table_for_trx(dict_table_t *table, trx_t *trx, lock_mode mode,
+                           bool no_wait)
+{
+  mem_heap_t *heap= mem_heap_create(512);
+  sel_node_t *node= sel_node_create(heap);
+  que_thr_t *thr= pars_complete_graph_for_exec(node, trx, heap, nullptr);
+  thr->graph->state= QUE_FORK_ACTIVE;
+
+  thr= static_cast<que_thr_t*>
+    (que_fork_get_first_thr(static_cast<que_fork_t*>
+                            (que_node_get_parent(thr))));
+
+run_again:
+  thr->run_node= thr;
+  thr->prev_node= thr->common.parent;
+  dberr_t err= lock_table(table, nullptr, mode, thr);
+
+  switch (err) {
+  case DB_SUCCESS:
+    break;
+  case DB_LOCK_WAIT:
+    if (no_wait)
+    {
+      lock_sys.cancel_lock_wait_for_trx(trx);
+      break;
+    }
+    /* fall through */
+  default:
+    trx->error_state= err;
+    if (row_mysql_handle_errors(&err, trx, thr, nullptr))
+      goto run_again;
+  }
+
+  que_graph_free(thr->graph);
+  trx->op_info= "";
+
+  return err;
+}
+
+/** Exclusively lock the data dictionary tables.
+@param trx  dictionary transaction
+@return error code
+@retval DB_SUCCESS on success */
+dberr_t lock_sys_tables(trx_t *trx)
+{
+  dberr_t err;
+  if (!(err= lock_table_for_trx(dict_sys.sys_tables, trx, LOCK_X)) &&
+      !(err= lock_table_for_trx(dict_sys.sys_columns, trx, LOCK_X)) &&
+      !(err= lock_table_for_trx(dict_sys.sys_indexes, trx, LOCK_X)) &&
+      !(err= lock_table_for_trx(dict_sys.sys_fields, trx, LOCK_X)))
+  {
+    if (dict_sys.sys_foreign)
+      err= lock_table_for_trx(dict_sys.sys_foreign, trx, LOCK_X);
+    if (!err && dict_sys.sys_foreign_cols)
+      err= lock_table_for_trx(dict_sys.sys_foreign_cols, trx, LOCK_X);
+    if (!err && dict_sys.sys_virtual)
+      err= lock_table_for_trx(dict_sys.sys_virtual, trx, LOCK_X);
+  }
+  return err;
+}
+
+/** Rebuild waiting queue after first_lock for heap_no. The queue is rebuilt
+close to the way lock_rec_dequeue_from_page() does it.
+@param trx        transaction that has set a lock, which caused the queue
+                  rebuild
+@param cell       rec hash cell of first_lock
+@param first_lock the lock after which waiting queue will be rebuilt
+@param heap_no    heap no of the record for which waiting queue to rebuild */
+static void lock_rec_rebuild_waiting_queue(
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
+    trx_t *trx,
+#endif /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+    hash_cell_t &cell, lock_t *first_lock, ulint heap_no)
+{
+  lock_sys.assert_locked(cell);
+
+  for (lock_t *lock= first_lock; lock != NULL;
+       lock= lock_rec_get_next(heap_no, lock))
+  {
+    if (!lock->is_waiting())
+      continue;
+    mysql_mutex_lock(&lock_sys.wait_mutex);
+    ut_ad(lock->trx->lock.wait_trx);
+    ut_ad(lock->trx->lock.wait_lock);
+
+    if (const lock_t *c= lock_rec_has_to_wait_in_queue(cell, lock))
+      lock->trx->lock.wait_trx= c->trx;
+    else
+    {
+      /* Grant the lock */
+      ut_ad(trx != lock->trx);
+      lock_grant(lock);
+    }
+    mysql_mutex_unlock(&lock_sys.wait_mutex);
+  }
+}
+
+/*=========================== LOCK RELEASE ==============================*/
+
+/*************************************************************//**
+Removes a granted record lock of a transaction from the queue and grants
+locks to other transactions waiting in the queue if they now are entitled
+to a lock. */
+TRANSACTIONAL_TARGET
+void
+lock_rec_unlock(
+/*============*/
+	trx_t*			trx,	/*!< in/out: transaction that has
+					set a record lock */
+	const page_id_t		id,	/*!< in: page containing rec */
+	const rec_t*		rec,	/*!< in: record */
+	lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
+{
+	lock_t*		first_lock;
+	lock_t*		lock;
+	ulint		heap_no;
+
+	ut_ad(trx);
+	ut_ad(rec);
+	ut_ad(!trx->lock.wait_lock);
+	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+	ut_ad(!page_rec_is_metadata(rec));
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	LockGuard g{lock_sys.rec_hash, id};
+
+	first_lock = lock_sys_t::get_first(g.cell(), id, heap_no);
+
+	/* Find the last lock with the same lock_mode and transaction
+	on the record. */
+
+	for (lock = first_lock; lock != NULL;
+	     lock = lock_rec_get_next(heap_no, lock)) {
+		if (lock->trx == trx && lock->mode() == lock_mode) {
+			goto released;
+		}
+	}
+
+	{
+		ib::error	err;
+		err << "Unlock row could not find a " << lock_mode
+			<< " mode lock on the record. Current statement: ";
+		size_t		stmt_len;
+		if (const char* stmt = innobase_get_stmt_unsafe(
+			    trx->mysql_thd, &stmt_len)) {
+			err.write(stmt, stmt_len);
+		}
+	}
+
+	return;
+
+released:
+	ut_a(!lock->is_waiting());
+	{
+		TMTrxGuard tg{*trx};
+		lock_rec_reset_nth_bit(lock, heap_no);
+	}
+
+	/* Check if we can now grant waiting lock requests */
+	lock_rec_rebuild_waiting_queue(
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
+					trx,
+#endif /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+					g.cell(), first_lock, heap_no);
+}
+
+/** Release the explicit locks of a committing transaction,
+and release possible other transactions waiting because of these locks.
+@return whether the operation succeeded */
+TRANSACTIONAL_TARGET static bool lock_release_try(trx_t *trx)
+{
+  /* At this point, trx->lock.trx_locks cannot be modified by other
+  threads, because our transaction has been committed.
+  See the checks and assertions in lock_rec_create_low() and
+  lock_rec_add_to_queue().
+
+  The function lock_table_create() should never be invoked on behalf
+  of a transaction running in another thread. Also there, we will
+  assert that the current transaction be active. */
+  DBUG_ASSERT(trx->state == TRX_STATE_COMMITTED_IN_MEMORY);
+  DBUG_ASSERT(!trx->is_referenced());
+
+  bool all_released= true;
+restart:
+  ulint count= 1000;
+  /* We will not attempt hardware lock elision (memory transaction)
+  here. Both lock_rec_dequeue_from_page() and lock_table_dequeue()
+  would likely lead to a memory transaction due to a system call, to
+  wake up a waiting transaction. */
+  lock_sys.rd_lock(SRW_LOCK_CALL);
+  trx->mutex_lock();
+
+  /* Note: Anywhere else, trx->mutex is not held while acquiring
+  a lock table latch, but here we are following the opposite order.
+  To avoid deadlocks, we only try to acquire the lock table latches
+  but not keep waiting for them. */
+
+  for (lock_t *lock= UT_LIST_GET_LAST(trx->lock.trx_locks); lock; )
+  {
+    ut_ad(lock->trx == trx);
+    lock_t *prev= UT_LIST_GET_PREV(trx_locks, lock);
+    if (!lock->is_table())
+    {
+      ut_ad(!lock->index->table->is_temporary());
+      ut_ad(lock->mode() != LOCK_X ||
+            lock->index->table->id >= DICT_HDR_FIRST_ID ||
+            trx->dict_operation || trx->was_dict_operation);
+      auto &lock_hash= lock_sys.hash_get(lock->type_mode);
+      auto cell= lock_hash.cell_get(lock->un_member.rec_lock.page_id.fold());
+      auto latch= lock_sys_t::hash_table::latch(cell);
+      if (!latch->try_acquire())
+        all_released= false;
+      else
+      {
+        lock_rec_dequeue_from_page(lock, false);
+        latch->release();
+      }
+    }
+    else
+    {
+      dict_table_t *table= lock->un_member.tab_lock.table;
+      ut_ad(!table->is_temporary());
+      ut_ad(table->id >= DICT_HDR_FIRST_ID ||
+            (lock->mode() != LOCK_IX && lock->mode() != LOCK_X) ||
+            trx->dict_operation || trx->was_dict_operation);
+      if (!table->lock_mutex_trylock())
+        all_released= false;
+      else
+      {
+        lock_table_dequeue(lock, false);
+        table->lock_mutex_unlock();
+      }
+    }
+
+    lock= all_released ? UT_LIST_GET_LAST(trx->lock.trx_locks) : prev;
+    if (!--count)
+      break;
+  }
+
+  lock_sys.rd_unlock();
+  trx->mutex_unlock();
+  if (all_released && !count)
+    goto restart;
+  return all_released;
+}
+
+/** Release the explicit locks of a committing transaction,
+and release possible other transactions waiting because of these locks. */
+void lock_release(trx_t *trx)
+{
+#ifdef UNIV_DEBUG
+  std::set<table_id_t> to_evict;
+  if (innodb_evict_tables_on_commit_debug &&
+      !trx->is_recovered && !dict_sys.locked())
+    for (const auto& p : trx->mod_tables)
+      if (!p.first->is_temporary())
+        to_evict.emplace(p.first->id);
+#endif
+  ulint count;
+
+  for (count= 5; count--; )
+    if (lock_release_try(trx))
+      goto released;
+
+  /* Fall back to acquiring lock_sys.latch in exclusive mode */
+restart:
+  count= 1000;
+  /* There is probably no point to try lock elision here;
+  in lock_release_try() it is different. */
+  lock_sys.wr_lock(SRW_LOCK_CALL);
+  trx->mutex_lock();
+
+  while (lock_t *lock= UT_LIST_GET_LAST(trx->lock.trx_locks))
+  {
+    ut_ad(lock->trx == trx);
+    if (!lock->is_table())
+    {
+      ut_ad(!lock->index->table->is_temporary());
+      ut_ad(lock->mode() != LOCK_X ||
+            lock->index->table->id >= DICT_HDR_FIRST_ID ||
+            trx->dict_operation || trx->was_dict_operation);
+      lock_rec_dequeue_from_page(lock, false);
+    }
+    else
+    {
+      ut_d(dict_table_t *table= lock->un_member.tab_lock.table);
+      ut_ad(!table->is_temporary());
+      ut_ad(table->id >= DICT_HDR_FIRST_ID ||
+            (lock->mode() != LOCK_IX && lock->mode() != LOCK_X) ||
+            trx->dict_operation || trx->was_dict_operation);
+      lock_table_dequeue(lock, false);
+    }
+
+    if (!--count)
+      break;
+  }
+
+  lock_sys.wr_unlock();
+  trx->mutex_unlock();
+  if (!count)
+    goto restart;
+
+released:
+  if (UNIV_UNLIKELY(Deadlock::to_be_checked))
+  {
+    mysql_mutex_lock(&lock_sys.wait_mutex);
+    lock_sys.deadlock_check();
+    mysql_mutex_unlock(&lock_sys.wait_mutex);
+  }
+
+  trx->lock.n_rec_locks= 0;
+
+#ifdef UNIV_DEBUG
+  if (to_evict.empty())
+    return;
+  dict_sys.lock(SRW_LOCK_CALL);
+  LockMutexGuard g{SRW_LOCK_CALL};
+  for (const table_id_t id : to_evict)
+    if (dict_table_t *table= dict_sys.find_table(id))
+      if (!table->get_ref_count() && !UT_LIST_GET_LEN(table->locks))
+        dict_sys.remove(table, true);
+  dict_sys.unlock();
+#endif
+}
+
+/** Release the explicit locks of a committing transaction while
+dict_sys.latch is exclusively locked,
+and release possible other transactions waiting because of these locks. */
+void lock_release_on_drop(trx_t *trx)
+{
+  ut_ad(lock_sys.is_writer());
+  ut_ad(trx->mutex_is_owner());
+  ut_ad(trx->dict_operation);
+
+  while (lock_t *lock= UT_LIST_GET_LAST(trx->lock.trx_locks))
+  {
+    ut_ad(lock->trx == trx);
+    if (!lock->is_table())
+    {
+      ut_ad(!lock->index->table->is_temporary());
+      ut_ad(lock->mode() != LOCK_X ||
+            lock->index->table->id >= DICT_HDR_FIRST_ID ||
+            trx->dict_operation);
+      lock_rec_dequeue_from_page(lock, false);
+    }
+    else
+    {
+      ut_d(dict_table_t *table= lock->un_member.tab_lock.table);
+      ut_ad(!table->is_temporary());
+      ut_ad(table->id >= DICT_HDR_FIRST_ID ||
+            (lock->mode() != LOCK_IX && lock->mode() != LOCK_X) ||
+            trx->dict_operation);
+      lock_table_dequeue(lock, false);
+    }
+  }
+}
+
+/** Reset lock bit for supremum and rebuild waiting queue.
+@param cell rec hash cell of in_lock
+@param lock the lock with supemum bit set */
+static void lock_rec_unlock_supremum(hash_cell_t &cell, lock_t *lock)
+{
+  ut_ad(lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM));
+#ifdef SAFE_MUTEX
+  ut_ad(!mysql_mutex_is_owner(&lock_sys.wait_mutex));
+#endif /* SAFE_MUTEX */
+  ut_ad(!lock->is_table());
+  ut_ad(lock_sys.is_writer() || lock->trx->mutex_is_owner());
+
+  lock_rec_reset_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM);
+
+  lock_t *first_lock= lock_sys_t::get_first(
+      cell, lock->un_member.rec_lock.page_id, PAGE_HEAP_NO_SUPREMUM);
+
+  lock_rec_rebuild_waiting_queue(
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
+      lock->trx,
+#endif /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+      cell, first_lock, PAGE_HEAP_NO_SUPREMUM);
+}
+
+/** Release non-exclusive locks on XA PREPARE,
+and wake up possible other transactions waiting because of these locks.
+@param trx   transaction in XA PREPARE state
+@return whether all locks were released */
+static bool lock_release_on_prepare_try(trx_t *trx)
+{
+  /* At this point, trx->lock.trx_locks can still be modified by other
+  threads to convert implicit exclusive locks into explicit ones.
+
+  The function lock_table_create() should never be invoked on behalf
+  of a transaction that is running in another thread. Also there, we
+  will assert that the current transaction be active. */
+  DBUG_ASSERT(trx->state == TRX_STATE_PREPARED);
+
+  bool all_released= true;
+  lock_sys.rd_lock(SRW_LOCK_CALL);
+  trx->mutex_lock();
+
+  /* Note: Normally, trx->mutex is not held while acquiring
+  a lock table latch, but here we are following the opposite order.
+  To avoid deadlocks, we only try to acquire the lock table latches
+  but not keep waiting for them. */
+
+  for (lock_t *prev, *lock= UT_LIST_GET_LAST(trx->lock.trx_locks); lock;
+       lock= prev)
+  {
+    ut_ad(lock->trx == trx);
+    prev= UT_LIST_GET_PREV(trx_locks, lock);
+    if (!lock->is_table())
+    {
+      ut_ad(!lock->index->table->is_temporary());
+      bool supremum_bit = lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM);
+      bool rec_granted_exclusive_not_gap =
+        lock->is_rec_granted_exclusive_not_gap();
+      if (!supremum_bit && rec_granted_exclusive_not_gap)
+        continue;
+      auto &lock_hash= lock_sys.hash_get(lock->type_mode);
+      auto cell= lock_hash.cell_get(lock->un_member.rec_lock.page_id.fold());
+      auto latch= lock_sys_t::hash_table::latch(cell);
+      if (latch->try_acquire())
+      {
+        if (!rec_granted_exclusive_not_gap)
+          lock_rec_dequeue_from_page(lock, false);
+        else if (supremum_bit)
+          lock_rec_unlock_supremum(*cell, lock);
+        latch->release();
+      }
+      else
+        all_released= false;
+    }
+    else
+    {
+      dict_table_t *table= lock->un_member.tab_lock.table;
+      ut_ad(!table->is_temporary());
+      switch (lock->mode()) {
+      case LOCK_IS:
+      case LOCK_S:
+        if (table->lock_mutex_trylock())
+        {
+          lock_table_dequeue(lock, false);
+          table->lock_mutex_unlock();
+        }
+        else
+          all_released= false;
+        break;
+      case LOCK_IX:
+      case LOCK_X:
+        ut_ad(table->id >= DICT_HDR_FIRST_ID || trx->dict_operation);
+        /* fall through */
+      default:
+        break;
+      }
+    }
+  }
+
+  lock_sys.rd_unlock();
+  trx->mutex_unlock();
+  return all_released;
+}
+
+/** Release non-exclusive locks on XA PREPARE,
+and release possible other transactions waiting because of these locks. */
+void lock_release_on_prepare(trx_t *trx)
+{
+  trx->set_skip_lock_inheritance();
+
+  for (ulint count= 5; count--; )
+    if (lock_release_on_prepare_try(trx))
+      return;
+
+  LockMutexGuard g{SRW_LOCK_CALL};
+  trx->mutex_lock();
+
+  for (lock_t *prev, *lock= UT_LIST_GET_LAST(trx->lock.trx_locks); lock;
+       lock= prev)
+  {
+    ut_ad(lock->trx == trx);
+    prev= UT_LIST_GET_PREV(trx_locks, lock);
+    if (!lock->is_table())
+    {
+      ut_ad(!lock->index->table->is_temporary());
+      if (!lock->is_rec_granted_exclusive_not_gap())
+        lock_rec_dequeue_from_page(lock, false);
+      else if (lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM))
+      {
+        auto &lock_hash= lock_sys.hash_get(lock->type_mode);
+        auto cell= lock_hash.cell_get(lock->un_member.rec_lock.page_id.fold());
+        lock_rec_unlock_supremum(*cell, lock);
+      }
+      else
+        ut_ad(lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
+              /* Insert-intention lock is valid for supremum for isolation
+              level > TRX_ISO_READ_COMMITTED */
+              lock->mode() == LOCK_X ||
+              !lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM));
+    }
+    else
+    {
+      ut_d(dict_table_t *table= lock->un_member.tab_lock.table);
+      ut_ad(!table->is_temporary());
+      switch (lock->mode()) {
+      case LOCK_IS:
+      case LOCK_S:
+        lock_table_dequeue(lock, false);
+        break;
+      case LOCK_IX:
+      case LOCK_X:
+        ut_ad(table->id >= DICT_HDR_FIRST_ID || trx->dict_operation);
+        /* fall through */
+      default:
+        break;
+      }
+    }
+  }
+
+  trx->mutex_unlock();
+}
+
+/** Release locks on a table whose creation is being rolled back */
+ATTRIBUTE_COLD
+void lock_release_on_rollback(trx_t *trx, dict_table_t *table)
+{
+  trx->mod_tables.erase(table);
+
+  /* This is very rarely executed code, in the rare case that an
+  CREATE TABLE operation is being rolled back. Theoretically,
+  we might try to remove the locks in multiple memory transactions. */
+  lock_sys.wr_lock(SRW_LOCK_CALL);
+  trx->mutex_lock();
+
+  for (lock_t *next, *lock= UT_LIST_GET_FIRST(table->locks); lock; lock= next)
+  {
+    next= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
+    ut_ad(lock->trx == trx);
+    UT_LIST_REMOVE(trx->lock.trx_locks, lock);
+    ut_list_remove(table->locks, lock, TableLockGetNode());
+  }
+
+  for (lock_t *p, *lock= UT_LIST_GET_LAST(trx->lock.trx_locks); lock; lock= p)
+  {
+    p= UT_LIST_GET_PREV(trx_locks, lock);
+    ut_ad(lock->trx == trx);
+    if (lock->is_table())
+      ut_ad(lock->un_member.tab_lock.table != table);
+    else if (lock->index->table == table)
+      lock_rec_dequeue_from_page(lock, false);
+  }
+
+  lock_sys.wr_unlock();
+  trx->mutex_unlock();
+}
+
+/*********************************************************************//**
+Removes table locks of the transaction on a table to be dropped. */
+static
+void
+lock_trx_table_locks_remove(
+/*========================*/
+	const lock_t*	lock_to_remove)		/*!< in: lock to remove */
+{
+	trx_t*		trx = lock_to_remove->trx;
+
+	ut_ad(lock_to_remove->is_table());
+	lock_sys.assert_locked(*lock_to_remove->un_member.tab_lock.table);
+	ut_ad(trx->mutex_is_owner());
+
+	for (lock_list::iterator it = trx->lock.table_locks.begin(),
+             end = trx->lock.table_locks.end(); it != end; ++it) {
+		const lock_t*	lock = *it;
+
+		ut_ad(!lock || trx == lock->trx);
+		ut_ad(!lock || lock->is_table());
+		ut_ad(!lock || lock->un_member.tab_lock.table);
+
+		if (lock == lock_to_remove) {
+			*it = NULL;
+			return;
+		}
+	}
+
+	/* Lock must exist in the vector. */
+	ut_error;
+}
+
+/*===================== VALIDATION AND DEBUGGING ====================*/
+
+/** Print info of a table lock.
+@param[in,out]	file	output stream
+@param[in]	lock	table lock */
+static
+void
+lock_table_print(FILE* file, const lock_t* lock)
+{
+	lock_sys.assert_locked();
+	ut_a(lock->is_table());
+
+	fputs("TABLE LOCK table ", file);
+	ut_print_name(file, lock->trx,
+		      lock->un_member.tab_lock.table->name.m_name);
+	fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+
+	switch (auto mode = lock->mode()) {
+	case LOCK_S:
+		fputs(" lock mode S", file);
+		break;
+	case LOCK_X:
+		ut_ad(lock->trx->id != 0);
+		fputs(" lock mode X", file);
+		break;
+	case LOCK_IS:
+		fputs(" lock mode IS", file);
+		break;
+	case LOCK_IX:
+		ut_ad(lock->trx->id != 0);
+		fputs(" lock mode IX", file);
+		break;
+	case LOCK_AUTO_INC:
+		fputs(" lock mode AUTO-INC", file);
+		break;
+	default:
+		fprintf(file, " unknown lock mode %u", mode);
+	}
+
+	if (lock->is_waiting()) {
+		fputs(" waiting", file);
+	}
+
+	putc('\n', file);
+}
+
+/** Pretty-print a record lock.
+@param[in,out]	file	output stream
+@param[in]	lock	record lock
+@param[in,out]	mtr	mini-transaction for accessing the record */
+static void lock_rec_print(FILE* file, const lock_t* lock, mtr_t& mtr)
+{
+	ut_ad(!lock->is_table());
+
+	const page_id_t page_id{lock->un_member.rec_lock.page_id};
+	ut_d(lock_sys.hash_get(lock->type_mode).assert_locked(page_id));
+
+	fprintf(file, "RECORD LOCKS space id %u page no %u n bits " ULINTPF
+		" index %s of table ",
+		page_id.space(), page_id.page_no(),
+		lock_rec_get_n_bits(lock),
+		lock->index->name());
+	ut_print_name(file, lock->trx, lock->index->table->name.m_name);
+	fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
+
+	switch (lock->mode()) {
+	case LOCK_S:
+		fputs(" lock mode S", file);
+		break;
+	case LOCK_X:
+		fputs(" lock_mode X", file);
+		break;
+	default:
+		ut_error;
+	}
+
+	if (lock->is_gap()) {
+		fputs(" locks gap before rec", file);
+	}
+
+	if (lock->is_record_not_gap()) {
+		fputs(" locks rec but not gap", file);
+	}
+
+	if (lock->is_insert_intention()) {
+		fputs(" insert intention", file);
+	}
+
+	if (lock->is_waiting()) {
+		fputs(" waiting", file);
+	}
+
+	putc('\n', file);
+
+	mem_heap_t*		heap		= NULL;
+	rec_offs		offsets_[REC_OFFS_NORMAL_SIZE];
+	rec_offs*		offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	mtr.start();
+	const buf_block_t* block = buf_page_try_get(page_id, &mtr);
+
+	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
+
+		if (!lock_rec_get_nth_bit(lock, i)) {
+			continue;
+		}
+
+		fprintf(file, "Record lock, heap no %lu", (ulong) i);
+
+		if (block) {
+			ut_ad(page_is_leaf(block->page.frame));
+			const rec_t*	rec;
+
+			rec = page_find_rec_with_heap_no(
+				buf_block_get_frame(block), i);
+			ut_ad(!page_rec_is_metadata(rec));
+
+			offsets = rec_get_offsets(
+				rec, lock->index, offsets,
+				lock->index->n_core_fields,
+				ULINT_UNDEFINED, &heap);
+
+			putc(' ', file);
+			rec_print_new(file, rec, offsets);
+		}
+
+		putc('\n', file);
+	}
+
+	mtr.commit();
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+}
+
+#ifdef UNIV_DEBUG
+/* Print the number of lock structs from lock_print_info_summary() only
+in non-production builds for performance reasons, see
+http://bugs.mysql.com/36942 */
+#define PRINT_NUM_OF_LOCK_STRUCTS
+#endif /* UNIV_DEBUG */
+
+#ifdef PRINT_NUM_OF_LOCK_STRUCTS
+/*********************************************************************//**
+Calculates the number of record lock structs in the record lock hash table.
+@return number of record locks */
+TRANSACTIONAL_TARGET
+static ulint lock_get_n_rec_locks()
+{
+	ulint	n_locks	= 0;
+	ulint	i;
+
+	lock_sys.assert_locked();
+
+	for (i = 0; i < lock_sys.rec_hash.n_cells; i++) {
+		const lock_t*	lock;
+
+		for (lock = static_cast<const lock_t*>(
+			     HASH_GET_FIRST(&lock_sys.rec_hash, i));
+		     lock != 0;
+		     lock = static_cast<const lock_t*>(
+				HASH_GET_NEXT(hash, lock))) {
+
+			n_locks++;
+		}
+	}
+
+	return(n_locks);
+}
+#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+
+/*********************************************************************//**
+Prints info of locks for all transactions.
+@return FALSE if not able to acquire lock_sys.latch (and dislay info) */
+ibool
+lock_print_info_summary(
+/*====================*/
+	FILE*	file,	/*!< in: file where to print */
+	ibool	nowait)	/*!< in: whether to wait for lock_sys.latch */
+{
+	/* Here, lock elision does not make sense, because
+	for the output we are going to invoke system calls,
+	which would interrupt a memory transaction. */
+	if (!nowait) {
+		lock_sys.wr_lock(SRW_LOCK_CALL);
+	} else if (!lock_sys.wr_lock_try()) {
+		fputs("FAIL TO OBTAIN LOCK MUTEX,"
+		      " SKIP LOCK INFO PRINTING\n", file);
+		return(FALSE);
+	}
+
+	if (lock_sys.deadlocks) {
+		fputs("------------------------\n"
+		      "LATEST DETECTED DEADLOCK\n"
+		      "------------------------\n", file);
+
+		if (!srv_read_only_mode) {
+			ut_copy_file(file, lock_latest_err_file);
+		}
+	}
+
+	fputs("------------\n"
+	      "TRANSACTIONS\n"
+	      "------------\n", file);
+
+	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
+		trx_sys.get_max_trx_id());
+
+	fprintf(file,
+		"Purge done for trx's n:o < " TRX_ID_FMT
+		" undo n:o < " TRX_ID_FMT " state: %s\n"
+		"History list length %zu\n",
+		purge_sys.tail.trx_no,
+		purge_sys.tail.undo_no,
+		purge_sys.enabled()
+		? (purge_sys.running() ? "running"
+		   : purge_sys.paused() ? "stopped" : "running but idle")
+		: "disabled",
+		trx_sys.history_size_approx());
+
+#ifdef PRINT_NUM_OF_LOCK_STRUCTS
+	fprintf(file,
+		"Total number of lock structs in row lock hash table %lu\n",
+		(ulong) lock_get_n_rec_locks());
+#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+	return(TRUE);
+}
+
+/** Prints transaction lock wait and MVCC state.
+@param[in,out]	file	file where to print
+@param[in]	trx	transaction
+@param[in]	now	current my_hrtime_coarse() */
+void lock_trx_print_wait_and_mvcc_state(FILE *file, const trx_t *trx,
+                                        my_hrtime_t now)
+{
+	fprintf(file, "---");
+
+	trx_print_latched(file, trx, 600);
+	trx->read_view.print_limits(file);
+
+	if (const lock_t* wait_lock = trx->lock.wait_lock) {
+		const my_hrtime_t suspend_time= trx->lock.suspend_time;
+		fprintf(file,
+			"------- TRX HAS BEEN WAITING %llu ns"
+			" FOR THIS LOCK TO BE GRANTED:\n",
+			now.val - suspend_time.val);
+
+		if (!wait_lock->is_table()) {
+			mtr_t mtr;
+			lock_rec_print(file, wait_lock, mtr);
+		} else {
+			lock_table_print(file, wait_lock);
+		}
+
+		fprintf(file, "------------------\n");
+	}
+}
+
+/*********************************************************************//**
+Prints info of locks for a transaction. */
+static
+void
+lock_trx_print_locks(
+/*=================*/
+	FILE*		file,		/*!< in/out: File to write */
+	const trx_t*	trx)		/*!< in: current transaction */
+{
+	mtr_t mtr;
+	uint32_t i= 0;
+	/* Iterate over the transaction's locks. */
+	lock_sys.assert_locked();
+	for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
+		if (!lock->is_table()) {
+			lock_rec_print(file, lock, mtr);
+		} else {
+			lock_table_print(file, lock);
+		}
+
+		if (++i == 10) {
+
+			fprintf(file,
+				"10 LOCKS PRINTED FOR THIS TRX:"
+				" SUPPRESSING FURTHER PRINTS\n");
+
+			break;
+		}
+	}
+}
+
+/** Functor to display all transactions */
+struct lock_print_info
+{
+  lock_print_info(FILE* file, my_hrtime_t now) :
+    file(file), now(now),
+    purge_trx(purge_sys.query ? purge_sys.query->trx : nullptr)
+  {}
+
+  void operator()(const trx_t &trx) const
+  {
+    if (UNIV_UNLIKELY(&trx == purge_trx))
+      return;
+    lock_trx_print_wait_and_mvcc_state(file, &trx, now);
+
+    if (trx.will_lock && srv_print_innodb_lock_monitor)
+      lock_trx_print_locks(file, &trx);
+  }
+
+  FILE* const file;
+  const my_hrtime_t now;
+  const trx_t* const purge_trx;
+};
+
+/*********************************************************************//**
+Prints info of locks for each transaction. This function will release
+lock_sys.latch, which the caller must be holding in exclusive mode. */
+void
+lock_print_info_all_transactions(
+/*=============================*/
+	FILE*		file)	/*!< in/out: file where to print */
+{
+	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
+
+	trx_sys.trx_list.for_each(lock_print_info(file, my_hrtime_coarse()));
+	lock_sys.wr_unlock();
+
+	ut_d(lock_validate());
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Find the the lock in the trx_t::trx_lock_t::table_locks vector.
+@return true if found */
+static
+bool
+lock_trx_table_locks_find(
+/*======================*/
+	trx_t*		trx,		/*!< in: trx to validate */
+	const lock_t*	find_lock)	/*!< in: lock to find */
+{
+	bool		found = false;
+
+	ut_ad(trx->mutex_is_owner());
+
+	for (lock_list::const_iterator it = trx->lock.table_locks.begin(),
+             end = trx->lock.table_locks.end(); it != end; ++it) {
+
+		const lock_t*	lock = *it;
+
+		if (lock == NULL) {
+
+			continue;
+
+		} else if (lock == find_lock) {
+
+			/* Can't be duplicates. */
+			ut_a(!found);
+			found = true;
+		}
+
+		ut_a(trx == lock->trx);
+		ut_a(lock->is_table());
+		ut_a(lock->un_member.tab_lock.table != NULL);
+	}
+
+	return(found);
+}
+
+/*********************************************************************//**
+Validates the lock queue on a table.
+@return TRUE if ok */
+static
+ibool
+lock_table_queue_validate(
+/*======================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	const lock_t*	lock;
+
+	lock_sys.assert_locked(*table);
+
+	for (lock = UT_LIST_GET_FIRST(table->locks);
+	     lock != NULL;
+	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
+
+		/* lock->trx->state cannot change from or to NOT_STARTED
+		while we are holding the lock_sys.latch. It may change
+		from ACTIVE or PREPARED to PREPARED or COMMITTED. */
+		lock->trx->mutex_lock();
+		check_trx_state(lock->trx);
+
+		if (lock->trx->state == TRX_STATE_COMMITTED_IN_MEMORY) {
+		} else if (!lock->is_waiting()) {
+			ut_a(!lock_table_other_has_incompatible(
+				     lock->trx, 0, table,
+				     lock->mode()));
+		} else {
+			ut_a(lock_table_has_to_wait_in_queue(lock));
+		}
+
+		ut_a(lock_trx_table_locks_find(lock->trx, lock));
+		lock->trx->mutex_unlock();
+	}
+
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Validates the lock queue on a single record.
+@return TRUE if ok */
+static
+bool
+lock_rec_queue_validate(
+/*====================*/
+	bool			locked_lock_trx_sys,
+					/*!< in: if the caller holds
+					both the lock_sys.latch and
+					trx_sys_t->lock. */
+	const page_id_t		id,	/*!< in: page identifier */
+	const rec_t*		rec,	/*!< in: record to look at */
+	const dict_index_t*	index,	/*!< in: index, or NULL if not known */
+	const rec_offs*		offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+	const lock_t*	lock;
+	ulint		heap_no;
+
+	ut_a(rec);
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!index || dict_index_is_clust(index)
+	      || !dict_index_is_online_ddl(index));
+
+	heap_no = page_rec_get_heap_no(rec);
+
+	if (!locked_lock_trx_sys) {
+		lock_sys.wr_lock(SRW_LOCK_CALL);
+	}
+
+	hash_cell_t &cell= *lock_sys.rec_hash.cell_get(id.fold());
+	lock_sys.assert_locked(cell);
+
+	if (!page_rec_is_user_rec(rec)) {
+
+		for (lock = lock_sys_t::get_first(cell, id, heap_no);
+		     lock != NULL;
+		     lock = lock_rec_get_next_const(heap_no, lock)) {
+
+			ut_ad(!index || lock->index == index);
+
+			lock->trx->mutex_lock();
+			ut_ad(!lock->trx->read_only
+			      || !lock->trx->is_autocommit_non_locking());
+			ut_ad(trx_state_eq(lock->trx,
+					   TRX_STATE_COMMITTED_IN_MEMORY)
+			      || !lock->is_waiting()
+			      || lock_rec_has_to_wait_in_queue(cell, lock));
+			lock->trx->mutex_unlock();
+		}
+
+func_exit:
+		if (!locked_lock_trx_sys) {
+			lock_sys.wr_unlock();
+		}
+
+		return true;
+	}
+
+	ut_ad(page_rec_is_leaf(rec));
+
+	const trx_id_t impl_trx_id = index && index->is_primary()
+		? lock_clust_rec_some_has_impl(rec, index, offsets)
+		: 0;
+
+	if (trx_t *impl_trx = impl_trx_id
+	    ? trx_sys.find(current_trx(), impl_trx_id, false)
+	    : 0) {
+		/* impl_trx could have been committed before we
+		acquire its mutex, but not thereafter. */
+
+		impl_trx->mutex_lock();
+		ut_ad(impl_trx->state != TRX_STATE_NOT_STARTED);
+		if (impl_trx->state == TRX_STATE_COMMITTED_IN_MEMORY) {
+		} else if (const lock_t* other_lock
+			   = lock_rec_other_has_expl_req(
+				   LOCK_S, cell, id, true, heap_no,
+				   impl_trx)) {
+			/* The impl_trx is holding an implicit lock on the
+			given record 'rec'. So there cannot be another
+			explicit granted lock.  Also, there can be another
+			explicit waiting lock only if the impl_trx has an
+			explicit granted lock. */
+
+#ifdef WITH_WSREP
+			/** Galera record locking rules:
+			* If there is no other record lock to the same record, we may grant
+			the lock request.
+			* If there is other record lock but this requested record lock is
+			compatible, we may grant the lock request.
+			* If there is other record lock and it is not compatible with
+			requested lock, all normal transactions must wait.
+			* BF (brute force) additional exceptions :
+			** If BF already holds record lock for requested record, we may
+			grant new record lock even if there is conflicting record lock(s)
+			waiting on a queue.
+			** If conflicting transaction holds requested record lock,
+			we will cancel this record lock and select conflicting transaction
+			for BF abort or kill victim.
+			** If conflicting transaction is waiting for requested record lock
+			we will cancel this wait and select conflicting transaction
+			for BF abort or kill victim.
+			** There should not be two BF transactions waiting for same record lock
+			*/
+			if (other_lock->trx->is_wsrep() && !other_lock->is_waiting()) {
+				wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id);
+				wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id);
+
+				if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+						       cell, id, heap_no,
+						       impl_trx)) {
+					ib::info() << "WSREP impl BF lock conflict";
+				}
+			} else
+#endif /* WITH_WSREP */
+			{
+				ut_ad(other_lock->is_waiting());
+				ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+						        cell, id, heap_no,
+							impl_trx));
+			}
+		}
+
+		impl_trx->mutex_unlock();
+	}
+
+	for (lock = lock_sys_t::get_first(cell, id, heap_no);
+	     lock != NULL;
+	     lock = lock_rec_get_next_const(heap_no, lock)) {
+		ut_ad(!lock->trx->read_only
+		      || !lock->trx->is_autocommit_non_locking());
+		ut_ad(!page_rec_is_metadata(rec));
+
+		if (index) {
+			ut_a(lock->index == index);
+		}
+
+		if (lock->is_waiting()) {
+			ut_a(lock->is_gap()
+			     || lock_rec_has_to_wait_in_queue(cell, lock));
+		} else if (!lock->is_gap()) {
+			const lock_mode	mode = lock->mode() == LOCK_S
+				? LOCK_X : LOCK_S;
+
+			const lock_t*	other_lock
+				= lock_rec_other_has_expl_req(
+					mode, cell, id, false, heap_no,
+					lock->trx);
+#ifdef WITH_WSREP
+			if (UNIV_UNLIKELY(other_lock && lock->trx->is_wsrep())) {
+				/* Only BF transaction may be granted
+				lock before other conflicting lock
+				request. */
+				if (!wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)
+				    && !wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)) {
+					/* If no BF, this case is a bug. */
+					wsrep_report_bf_lock_wait(lock->trx->mysql_thd, lock->trx->id);
+					wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id);
+					ut_error;
+				}
+			} else
+#endif /* WITH_WSREP */
+			ut_ad(!other_lock);
+		}
+	}
+
+	goto func_exit;
+}
+
+/** Validate the record lock queues on a page.
+@param block    buffer pool block
+@param latched  whether the tablespace latch may be held
+@return true if ok */
+static bool lock_rec_validate_page(const buf_block_t *block, bool latched)
+{
+	const lock_t*	lock;
+	const rec_t*	rec;
+	ulint		nth_lock	= 0;
+	ulint		nth_bit		= 0;
+	ulint		i;
+	mem_heap_t*	heap		= NULL;
+	rec_offs	offsets_[REC_OFFS_NORMAL_SIZE];
+	rec_offs*	offsets		= offsets_;
+	rec_offs_init(offsets_);
+
+	const page_id_t id{block->page.id()};
+
+	LockGuard g{lock_sys.rec_hash, id};
+loop:
+	lock = lock_sys_t::get_first(g.cell(), id);
+
+	if (!lock) {
+		goto function_exit;
+	}
+
+	DBUG_ASSERT(!block->page.is_freed());
+
+	for (i = 0; i < nth_lock; i++) {
+
+		lock = lock_rec_get_next_on_page_const(lock);
+
+		if (!lock) {
+			goto function_exit;
+		}
+	}
+
+	ut_ad(!lock->trx->read_only
+	      || !lock->trx->is_autocommit_non_locking());
+
+	/* Only validate the record queues when this thread is not
+	holding a tablespace latch. */
+	if (!latched)
+	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
+		bool locked = lock_rec_get_nth_bit(lock, i);
+		if (locked || i == PAGE_HEAP_NO_SUPREMUM) {
+
+			rec = page_find_rec_with_heap_no(block->page.frame, i);
+			ut_a(rec);
+			ut_ad(!locked || page_rec_is_leaf(rec));
+
+			/* If this thread is holding the file space
+			latch (fil_space_t::latch), the following
+			check WILL break the latching order and may
+			cause a deadlock of threads. */
+
+			if (locked) {
+				offsets = rec_get_offsets(rec, lock->index,
+					offsets, lock->index->n_core_fields,
+					ULINT_UNDEFINED, &heap);
+				lock_rec_queue_validate(true, id, rec,
+					lock->index, offsets);
+			}
+
+			nth_bit = i + 1;
+
+			goto loop;
+		}
+	}
+
+	nth_bit = 0;
+	nth_lock++;
+
+	goto loop;
+
+function_exit:
+	if (heap != NULL) {
+		mem_heap_free(heap);
+	}
+	return(TRUE);
+}
+
+/*********************************************************************//**
+Validate record locks up to a limit.
+@return lock at limit or NULL if no more locks in the hash bucket */
+static MY_ATTRIBUTE((warn_unused_result))
+const lock_t*
+lock_rec_validate(
+/*==============*/
+	ulint		start,		/*!< in: lock_sys.rec_hash
+					bucket */
+	page_id_t*	limit)		/*!< in/out: upper limit of
+					(space, page_no) */
+{
+	lock_sys.assert_locked();
+
+	for (const lock_t* lock = static_cast<const lock_t*>(
+		     HASH_GET_FIRST(&lock_sys.rec_hash, start));
+	     lock != NULL;
+	     lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
+
+		ut_ad(!lock->trx->read_only
+		      || !lock->trx->is_autocommit_non_locking());
+		ut_ad(!lock->is_table());
+
+		page_id_t current(lock->un_member.rec_lock.page_id);
+
+		if (current > *limit) {
+			*limit = current + 1;
+			return(lock);
+		}
+	}
+
+	return(0);
+}
+
+/*********************************************************************//**
+Validate a record lock's block */
+static void lock_rec_block_validate(const page_id_t page_id)
+{
+	/* The lock and the block that it is referring to may be freed at
+	this point. */
+
+	buf_block_t*	block;
+	mtr_t		mtr;
+
+	/* Transactional locks should never refer to dropped
+	tablespaces, because all DDL operations that would drop or
+	discard or rebuild a tablespace do hold an exclusive table
+	lock, which would conflict with any locks referring to the
+	tablespace from other transactions. */
+	if (fil_space_t* space = fil_space_t::get(page_id.space())) {
+		dberr_t err = DB_SUCCESS;
+		mtr_start(&mtr);
+
+		block = buf_page_get_gen(
+			page_id,
+			space->zip_size(),
+			RW_S_LATCH, NULL,
+			BUF_GET_POSSIBLY_FREED,
+			&mtr, &err);
+
+		ut_ad(!block
+		      || lock_rec_validate_page(block, space->is_latched()));
+
+		mtr_commit(&mtr);
+
+		space->release();
+	}
+}
+
+static my_bool lock_validate_table_locks(rw_trx_hash_element_t *element, void*)
+{
+  lock_sys.assert_locked();
+  element->mutex.wr_lock();
+  if (element->trx)
+  {
+    check_trx_state(element->trx);
+    for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
+         lock != NULL;
+         lock= UT_LIST_GET_NEXT(trx_locks, lock))
+      if (lock->is_table())
+        lock_table_queue_validate(lock->un_member.tab_lock.table);
+  }
+  element->mutex.wr_unlock();
+  return 0;
+}
+
+
+/** Validate the transactional locks. */
+static void lock_validate()
+{
+  std::set<page_id_t> pages;
+  {
+    LockMutexGuard g{SRW_LOCK_CALL};
+    /* Validate table locks */
+    trx_sys.rw_trx_hash.iterate(lock_validate_table_locks);
+
+    for (ulint i= 0; i < lock_sys.rec_hash.n_cells; i++)
+    {
+      page_id_t limit{0, 0};
+      while (const lock_t *lock= lock_rec_validate(i, &limit))
+      {
+        if (lock_rec_find_set_bit(lock) == ULINT_UNDEFINED)
+          /* The lock bitmap is empty; ignore it. */
+          continue;
+        pages.insert(lock->un_member.rec_lock.page_id);
+      }
+    }
+  }
+
+  for (page_id_t page_id : pages)
+    lock_rec_block_validate(page_id);
+}
+#endif /* UNIV_DEBUG */
+/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+TRANSACTIONAL_TARGET
+dberr_t
+lock_rec_insert_check_and_lock(
+/*===========================*/
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	bool*		inherit)/*!< out: set to true if the new
+				inserted record maybe should inherit
+				LOCK_GAP type locks from the successor
+				record */
+{
+  ut_ad(block->page.frame == page_align(rec));
+  ut_ad(mtr->is_named_space(index->table->space));
+  ut_ad(page_is_leaf(block->page.frame));
+  ut_ad(!index->table->is_temporary());
+
+  const rec_t *next_rec= page_rec_get_next_const(rec);
+  if (UNIV_UNLIKELY(!next_rec || rec_is_metadata(next_rec, *index)))
+    return DB_CORRUPTION;
+
+  dberr_t err= DB_SUCCESS;
+  bool inherit_in= *inherit;
+  trx_t *trx= thr_get_trx(thr);
+  ulint heap_no= page_rec_get_heap_no(next_rec);
+  const page_id_t id{block->page.id()};
+
+  {
+    LockGuard g{lock_sys.rec_hash, id};
+    /* Because this code is invoked for a running transaction by
+    the thread that is serving the transaction, it is not necessary
+    to hold trx->mutex here. */
+
+    /* When inserting a record into an index, the table must be at
+    least IX-locked. When we are building an index, we would pass
+    BTR_NO_LOCKING_FLAG and skip the locking altogether. */
+    ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+
+    *inherit= lock_sys_t::get_first(g.cell(), id, heap_no);
+
+    if (*inherit)
+    {
+      /* Spatial index does not use GAP lock protection. It uses
+      "predicate lock" to protect the "range" */
+      if (index->is_spatial())
+        return DB_SUCCESS;
+
+      /* If another transaction has an explicit lock request which locks
+      the gap, waiting or granted, on the successor, the insert has to wait.
+
+      An exception is the case where the lock by the another transaction
+      is a gap type lock which it placed to wait for its turn to insert. We
+      do not consider that kind of a lock conflicting with our insert. This
+      eliminates an unnecessary deadlock which resulted when 2 transactions
+      had to wait for their insert. Both had waiting gap type lock requests
+      on the successor, which produced an unnecessary deadlock. */
+      const unsigned type_mode= LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
+
+      if (lock_t *c_lock= lock_rec_other_has_conflicting(type_mode,
+                                                         g.cell(), id,
+                                                         heap_no, trx))
+      {
+        trx->mutex_lock();
+        err= lock_rec_enqueue_waiting(c_lock, type_mode, id, block->page.frame,
+                                      heap_no, index, thr, nullptr);
+        trx->mutex_unlock();
+      }
+    }
+  }
+
+  switch (err) {
+  case DB_SUCCESS_LOCKED_REC:
+    err = DB_SUCCESS;
+    /* fall through */
+  case DB_SUCCESS:
+    if (!inherit_in || index->is_clust())
+      break;
+    /* Update the page max trx id field */
+    page_update_max_trx_id(block, buf_block_get_page_zip(block), trx->id, mtr);
+  default:
+    /* We only care about the two return values. */
+    break;
+  }
+
+#ifdef UNIV_DEBUG
+  {
+    mem_heap_t *heap= nullptr;
+    rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+    const rec_offs *offsets;
+    rec_offs_init(offsets_);
+
+    offsets= rec_get_offsets(next_rec, index, offsets_, index->n_core_fields,
+                             ULINT_UNDEFINED, &heap);
+
+    ut_ad(lock_rec_queue_validate(false, id, next_rec, index, offsets));
+
+    if (UNIV_LIKELY_NULL(heap))
+      mem_heap_free(heap);
+  }
+#endif /* UNIV_DEBUG */
+
+  return err;
+}
+
+/*********************************************************************//**
+Creates an explicit record lock for a running transaction that currently only
+has an implicit lock on the record. The transaction instance must have a
+reference count > 0 so that it can't be committed and freed before this
+function has completed. */
+static
+bool
+lock_rec_convert_impl_to_expl_for_trx(
+/*==================================*/
+	trx_t*			trx,	/*!< in/out: active transaction */
+	const page_id_t		id,	/*!< in: page identifier */
+	const rec_t*		rec,	/*!< in: user record on page */
+	dict_index_t*		index)	/*!< in: index of record */
+{
+  if (!trx)
+    return false;
+
+  ut_ad(trx->is_referenced());
+  ut_ad(page_rec_is_leaf(rec));
+  ut_ad(!rec_is_metadata(rec, *index));
+
+  DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
+  ulint heap_no= page_rec_get_heap_no(rec);
+
+  {
+    LockGuard g{lock_sys.rec_hash, id};
+    trx->mutex_lock();
+    ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+
+    if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) &&
+        !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no,
+                           trx))
+      lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id,
+                            page_align(rec), heap_no, index, trx, true);
+  }
+
+  trx->mutex_unlock();
+  trx->release_reference();
+
+  DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
+  return false;
+}
+
+
+#ifdef UNIV_DEBUG
+struct lock_rec_other_trx_holds_expl_arg
+{
+  const ulint heap_no;
+  const hash_cell_t &cell;
+  const page_id_t id;
+  const trx_t &impl_trx;
+};
+
+
+static my_bool lock_rec_other_trx_holds_expl_callback(
+  rw_trx_hash_element_t *element,
+  lock_rec_other_trx_holds_expl_arg *arg)
+{
+  element->mutex.wr_lock();
+  if (element->trx)
+  {
+    element->trx->mutex_lock();
+    ut_ad(element->trx->state != TRX_STATE_NOT_STARTED);
+    lock_t *expl_lock= element->trx->state == TRX_STATE_COMMITTED_IN_MEMORY
+      ? nullptr
+      : lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP,
+                          arg->cell, arg->id, arg->heap_no, element->trx);
+    /*
+      An explicit lock is held by trx other than the trx holding the implicit
+      lock.
+    */
+    ut_ad(!expl_lock || expl_lock->trx == &arg->impl_trx);
+    element->trx->mutex_unlock();
+  }
+  element->mutex.wr_unlock();
+  return 0;
+}
+
+
+/**
+  Checks if some transaction, other than given trx_id, has an explicit
+  lock on the given rec.
+
+  FIXME: if the current transaction holds implicit lock from INSERT, a
+  subsequent locking read should not convert it to explicit. See also
+  MDEV-11215.
+
+  @param      caller_trx  trx of current thread
+  @param[in]  trx         trx holding implicit lock on rec
+  @param[in]  rec         user record
+  @param[in]  id          page identifier
+*/
+static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx,
+                                          const rec_t *rec,
+                                          const page_id_t id)
+{
+  if (trx)
+  {
+    ut_ad(!page_rec_is_metadata(rec));
+    LockGuard g{lock_sys.rec_hash, id};
+    ut_ad(trx->is_referenced());
+    const trx_state_t state{trx->state};
+    ut_ad(state != TRX_STATE_NOT_STARTED);
+    if (state == TRX_STATE_COMMITTED_IN_MEMORY)
+      /* The transaction was committed before we acquired LockGuard. */
+      return;
+    lock_rec_other_trx_holds_expl_arg arg=
+    { page_rec_get_heap_no(rec), g.cell(), id, *trx };
+    trx_sys.rw_trx_hash.iterate(caller_trx,
+                                lock_rec_other_trx_holds_expl_callback, &arg);
+  }
+}
+#endif /* UNIV_DEBUG */
+
+/** If an implicit x-lock exists on a record, convert it to an explicit one.
+
+Often, this is called by a transaction that is about to enter a lock wait
+due to the lock conflict. Two explicit locks would be created: first the
+exclusive lock on behalf of the lock-holder transaction in this function,
+and then a wait request on behalf of caller_trx, in the calling function.
+
+This may also be called by the same transaction that is already holding
+an implicit exclusive lock on the record. In this case, no explicit lock
+should be created.
+
+@tparam		is_primary	whether the index is the primary key
+@param[in,out]	caller_trx	current transaction
+@param[in]	id		index tree leaf page identifier
+@param[in]	rec		record on the leaf page
+@param[in]	index		the index of the record
+@param[in]	offsets		rec_get_offsets(rec,index)
+@return	whether caller_trx already holds an exclusive lock on rec */
+template<bool is_primary>
+static
+bool
+lock_rec_convert_impl_to_expl(
+	trx_t*			caller_trx,
+	page_id_t		id,
+	const rec_t*		rec,
+	dict_index_t*		index,
+	const rec_offs*		offsets)
+{
+	trx_t*		trx;
+
+	lock_sys.assert_unlocked();
+	ut_ad(page_rec_is_user_rec(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, *index));
+	ut_ad(index->is_primary() == is_primary);
+
+	if (is_primary) {
+		trx_id_t	trx_id;
+
+		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
+
+		if (trx_id == 0) {
+			return false;
+		}
+		if (UNIV_UNLIKELY(trx_id == caller_trx->id)) {
+			return true;
+		}
+
+		trx = trx_sys.find(caller_trx, trx_id);
+	} else {
+		ut_ad(!dict_index_is_online_ddl(index));
+
+		trx = lock_sec_rec_some_has_impl(caller_trx, rec, index,
+						 offsets);
+		if (trx == caller_trx) {
+			trx->release_reference();
+			return true;
+		}
+
+		ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec, id));
+	}
+
+	return lock_rec_convert_impl_to_expl_for_trx(trx, id, rec, index);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue.
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: record which should be
+					modified */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const rec_offs*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	dberr_t	err;
+	ulint	heap_no;
+
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(block->page.frame == page_align(rec));
+
+	ut_ad(!rec_is_metadata(rec, *index));
+	ut_ad(!index->table->is_temporary());
+
+	heap_no = rec_offs_comp(offsets)
+		? rec_get_heap_no_new(rec)
+		: rec_get_heap_no_old(rec);
+
+	/* If a transaction has no explicit x-lock set on the record, set one
+	for it */
+
+	if (lock_rec_convert_impl_to_expl<true>(thr_get_trx(thr),
+						block->page.id(),
+						rec, index, offsets)) {
+		/* We already hold an implicit exclusive lock. */
+		return DB_SUCCESS;
+	}
+
+	err = lock_rec_lock(true, LOCK_X | LOCK_REC_NOT_GAP,
+			    block, heap_no, index, thr);
+
+	ut_ad(lock_rec_queue_validate(false, block->page.id(),
+				      rec, index, offsets));
+
+	if (err == DB_SUCCESS_LOCKED_REC) {
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate modify (delete
+mark or delete unmark) of a secondary index record.
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+				bit is set, does nothing */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	const rec_t*	rec,	/*!< in: record which should be
+				modified; NOTE: as this is a secondary
+				index, we always have to modify the
+				clustered index record first: see the
+				comment below */
+	dict_index_t*	index,	/*!< in: secondary index */
+	que_thr_t*	thr,	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+	mtr_t*		mtr)	/*!< in/out: mini-transaction */
+{
+	dberr_t	err;
+	ulint	heap_no;
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
+	ut_ad(block->page.frame == page_align(rec));
+	ut_ad(mtr->is_named_space(index->table->space));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, *index));
+
+	if (flags & BTR_NO_LOCKING_FLAG) {
+
+		return(DB_SUCCESS);
+	}
+	ut_ad(!index->table->is_temporary());
+
+	heap_no = page_rec_get_heap_no(rec);
+
+#ifdef WITH_WSREP
+	trx_t *trx= thr_get_trx(thr);
+	/* If transaction scanning an unique secondary key is wsrep
+	high priority thread (brute force) this scanning may involve
+	GAP-locking in the index. As this locking happens also when
+	applying replication events in high priority applier threads,
+	there is a probability for lock conflicts between two wsrep
+	high priority threads. To avoid this GAP-locking we mark that
+	this transaction is using unique key scan here. */
+	if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false))
+		trx->wsrep = 3;
+#endif /* WITH_WSREP */
+
+	/* Another transaction cannot have an implicit lock on the record,
+	because when we come here, we already have modified the clustered
+	index record, and this would not have been possible if another active
+	transaction had modified this secondary index record. */
+
+	err = lock_rec_lock(true, LOCK_X | LOCK_REC_NOT_GAP,
+			    block, heap_no, index, thr);
+
+#ifdef WITH_WSREP
+	if (trx->wsrep == 3) trx->wsrep = 1;
+#endif /* WITH_WSREP */
+
+#ifdef UNIV_DEBUG
+	{
+		mem_heap_t*	heap		= NULL;
+		rec_offs	offsets_[REC_OFFS_NORMAL_SIZE];
+		const rec_offs*	offsets;
+		rec_offs_init(offsets_);
+
+		offsets = rec_get_offsets(rec, index, offsets_,
+					  index->n_core_fields,
+					  ULINT_UNDEFINED, &heap);
+
+		ut_ad(lock_rec_queue_validate(
+			      false, block->page.id(), rec, index, offsets));
+
+		if (heap != NULL) {
+			mem_heap_free(heap);
+		}
+	}
+#endif /* UNIV_DEBUG */
+
+	if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
+		/* Update the page max trx id field */
+		/* It might not be necessary to do this if
+		err == DB_SUCCESS (no new lock created),
+		but it should not cost too much performance. */
+		page_update_max_trx_id(block,
+				       buf_block_get_page_zip(block),
+				       thr_get_trx(thr)->id, mtr);
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Like lock_clust_rec_read_check_and_lock(), but reads a
+secondary index record.
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: secondary index */
+	const rec_offs*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	unsigned		gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	dberr_t	err;
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(block->page.frame == page_align(rec));
+	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+	if ((flags & BTR_NO_LOCKING_FLAG)
+	    || srv_read_only_mode
+	    || index->table->is_temporary()) {
+
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(!rec_is_metadata(rec, *index));
+
+	trx_t *trx = thr_get_trx(thr);
+
+	if (lock_table_has(trx, index->table, mode)) {
+		return DB_SUCCESS;
+	}
+
+	if (!page_rec_is_supremum(rec)
+	    && lock_rec_convert_impl_to_expl<false>(
+		       trx, block->page.id(), rec, index, offsets)
+	    && gap_mode == LOCK_REC_NOT_GAP) {
+		/* We already hold an implicit exclusive lock. */
+		return DB_SUCCESS;
+	}
+
+#ifdef WITH_WSREP
+	/* If transaction scanning an unique secondary key is wsrep
+	high priority thread (brute force) this scanning may involve
+	GAP-locking in the index. As this locking happens also when
+	applying replication events in high priority applier threads,
+	there is a probability for lock conflicts between two wsrep
+	high priority threads. To avoid this GAP-locking we mark that
+	this transaction is using unique key scan here. */
+	if (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false))
+		trx->wsrep = 3;
+#endif /* WITH_WSREP */
+
+	err = lock_rec_lock(false, gap_mode | mode,
+			    block, page_rec_get_heap_no(rec), index, thr);
+
+#ifdef WITH_WSREP
+	if (trx->wsrep == 3) trx->wsrep = 1;
+#endif /* WITH_WSREP */
+
+	ut_ad(lock_rec_queue_validate(false, block->page.id(),
+				      rec, index, offsets));
+
+	DEBUG_SYNC_C("lock_sec_rec_read_check_and_lock_has_locked");
+
+	return(err);
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record.
+@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	const rec_offs*		offsets,/*!< in: rec_get_offsets(rec, index) */
+	lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	unsigned		gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	ut_ad(dict_index_is_clust(index));
+	ut_ad(block->page.frame == page_align(rec));
+	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
+	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
+	      || gap_mode == LOCK_REC_NOT_GAP);
+	ut_ad(rec_offs_validate(rec, index, offsets));
+	ut_ad(page_rec_is_leaf(rec));
+	ut_ad(!rec_is_metadata(rec, *index));
+
+	if ((flags & BTR_NO_LOCKING_FLAG)
+	    || srv_read_only_mode
+	    || index->table->is_temporary()) {
+
+		return(DB_SUCCESS);
+	}
+
+	const page_id_t id{block->page.id()};
+
+	ulint heap_no = page_rec_get_heap_no(rec);
+
+	trx_t *trx = thr_get_trx(thr);
+	if (!lock_table_has(trx, index->table, LOCK_X)
+	    && heap_no != PAGE_HEAP_NO_SUPREMUM
+	    && lock_rec_convert_impl_to_expl<true>(trx, id,
+						   rec, index, offsets)
+	    && gap_mode == LOCK_REC_NOT_GAP) {
+		/* We already hold an implicit exclusive lock. */
+		return DB_SUCCESS;
+	}
+
+	dberr_t err = lock_rec_lock(false, gap_mode | mode,
+				    block, heap_no, index, thr);
+
+	ut_ad(lock_rec_queue_validate(false, id, rec, index, offsets));
+
+	DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
+
+	return(err);
+}
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets".
+@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
+					bit is set, does nothing */
+	const buf_block_t*	block,	/*!< in: buffer block of rec */
+	const rec_t*		rec,	/*!< in: user record or page
+					supremum record which should
+					be read or passed over by a
+					read cursor */
+	dict_index_t*		index,	/*!< in: clustered index */
+	lock_mode		mode,	/*!< in: mode of the lock which
+					the read cursor should set on
+					records: LOCK_S or LOCK_X; the
+					latter is possible in
+					SELECT FOR UPDATE */
+	unsigned		gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
+					LOCK_REC_NOT_GAP */
+	que_thr_t*		thr)	/*!< in: query thread */
+{
+	mem_heap_t*	tmp_heap	= NULL;
+	rec_offs	offsets_[REC_OFFS_NORMAL_SIZE];
+	rec_offs*	offsets		= offsets_;
+	dberr_t		err;
+	rec_offs_init(offsets_);
+
+	ut_ad(page_rec_is_leaf(rec));
+	offsets = rec_get_offsets(rec, index, offsets, index->n_core_fields,
+				  ULINT_UNDEFINED, &tmp_heap);
+	err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
+						 offsets, mode, gap_mode, thr);
+	if (tmp_heap) {
+		mem_heap_free(tmp_heap);
+	}
+
+	if (err == DB_SUCCESS_LOCKED_REC) {
+		err = DB_SUCCESS;
+	}
+
+	return(err);
+}
+
+/*******************************************************************//**
+Check if a transaction holds any autoinc locks.
+@return TRUE if the transaction holds any AUTOINC locks. */
+static
+ibool
+lock_trx_holds_autoinc_locks(
+/*=========================*/
+	const trx_t*	trx)		/*!< in: transaction */
+{
+	ut_a(trx->autoinc_locks != NULL);
+
+	return(!ib_vector_is_empty(trx->autoinc_locks));
+}
+
+/** Release all AUTO_INCREMENT locks of the transaction. */
+static void lock_release_autoinc_locks(trx_t *trx)
+{
+  {
+    LockMutexGuard g{SRW_LOCK_CALL};
+    mysql_mutex_lock(&lock_sys.wait_mutex);
+    trx->mutex_lock();
+    auto autoinc_locks= trx->autoinc_locks;
+    ut_a(autoinc_locks);
+
+    /* We release the locks in the reverse order. This is to avoid
+    searching the vector for the element to delete at the lower level.
+    See (lock_table_remove_low()) for details. */
+    while (ulint size= ib_vector_size(autoinc_locks))
+    {
+      lock_t *lock= *static_cast<lock_t**>
+        (ib_vector_get(autoinc_locks, size - 1));
+      ut_ad(lock->type_mode == (LOCK_AUTO_INC | LOCK_TABLE));
+      lock_table_dequeue(lock, true);
+      lock_trx_table_locks_remove(lock);
+    }
+  }
+  mysql_mutex_unlock(&lock_sys.wait_mutex);
+  trx->mutex_unlock();
+}
+
+/** Cancel a waiting lock request and release possibly waiting transactions */
+template <bool from_deadlock= false, bool inner_trx_lock= true>
+void lock_cancel_waiting_and_release(lock_t *lock)
+{
+  lock_sys.assert_locked(*lock);
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+  trx_t *trx= lock->trx;
+  if (inner_trx_lock)
+    trx->mutex_lock();
+  ut_d(const auto trx_state= trx->state);
+  ut_ad(trx_state == TRX_STATE_COMMITTED_IN_MEMORY ||
+        trx_state == TRX_STATE_ACTIVE);
+
+  if (!lock->is_table())
+    lock_rec_dequeue_from_page(lock, true);
+  else
+  {
+    if (lock->type_mode == (LOCK_AUTO_INC | LOCK_TABLE))
+    {
+      ut_ad(trx->autoinc_locks);
+      ib_vector_remove(trx->autoinc_locks, lock);
+    }
+    lock_table_dequeue(lock, true);
+    /* Remove the lock from table lock vector too. */
+    lock_trx_table_locks_remove(lock);
+  }
+
+  /* Reset the wait flag and the back pointer to lock in trx. */
+  lock_reset_lock_and_trx_wait(lock);
+
+  lock_wait_end<from_deadlock>(trx);
+
+  if (inner_trx_lock)
+    trx->mutex_unlock();
+}
+
+void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx)
+{
+  lock_sys.wr_lock(SRW_LOCK_CALL);
+  mysql_mutex_lock(&lock_sys.wait_mutex);
+  if (lock_t *lock= trx->lock.wait_lock)
+  {
+    /* check if victim is still waiting */
+    if (lock->is_waiting())
+      lock_cancel_waiting_and_release(lock);
+  }
+  lock_sys.wr_unlock();
+  mysql_mutex_unlock(&lock_sys.wait_mutex);
+}
+
+#ifdef WITH_WSREP
+void lock_sys_t::cancel_lock_wait_for_wsrep_bf_abort(trx_t *trx)
+{
+  lock_sys.assert_locked();
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+  ut_ad(trx->mutex_is_owner());
+  ut_ad(trx->state == TRX_STATE_ACTIVE || trx->state == TRX_STATE_PREPARED);
+  trx->lock.set_wsrep_victim();
+  if (lock_t *lock= trx->lock.wait_lock)
+    lock_cancel_waiting_and_release<false, false>(lock);
+}
+#endif /* WITH_WSREP */
+
+/** Cancel a waiting lock request.
+@tparam check_victim  whether to check for DB_DEADLOCK
+@param trx            active transaction
+@param lock           waiting lock request
+@retval DB_SUCCESS    if no lock existed
+@retval DB_DEADLOCK   if trx->lock.was_chosen_as_deadlock_victim was set
+@retval DB_LOCK_WAIT  if the lock was canceled */
+template<bool check_victim>
+dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
+{
+  DEBUG_SYNC_C("lock_sys_t_cancel_enter");
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+  ut_ad(trx->state == TRX_STATE_ACTIVE);
+  /* trx->lock.wait_lock may be changed by other threads as long as
+  we are not holding lock_sys.latch.
+
+  So, trx->lock.wait_lock==lock does not necessarily hold, but both
+  pointers should be valid, because other threads cannot assign
+  trx->lock.wait_lock=nullptr (or invalidate *lock) while we are
+  holding lock_sys.wait_mutex. Also, the type of trx->lock.wait_lock
+  (record or table lock) cannot be changed by other threads. So, it is
+  safe to call lock->is_table() while not holding lock_sys.latch. If
+  we have to release and reacquire lock_sys.wait_mutex, we must reread
+  trx->lock.wait_lock. We must also reread trx->lock.wait_lock after
+  lock_sys.latch acquiring, as it can be changed to not-null in lock moving
+  functions even if we hold lock_sys.wait_mutex. */
+  dberr_t err= DB_SUCCESS;
+  /* This would be too large for a memory transaction, except in the
+  DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */
+  if (lock->is_table())
+  {
+    if (!lock_sys.rd_lock_try())
+    {
+      mysql_mutex_unlock(&lock_sys.wait_mutex);
+      lock_sys.rd_lock(SRW_LOCK_CALL);
+      mysql_mutex_lock(&lock_sys.wait_mutex);
+      lock= trx->lock.wait_lock;
+      /* Even if waiting lock was cancelled while lock_sys.wait_mutex was
+      unlocked, we need to return deadlock error if transaction was chosen
+      as deadlock victim to rollback it */
+      if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
+        err= DB_DEADLOCK;
+      else if (lock)
+        goto resolve_table_lock;
+    }
+    else
+    {
+      /* This function is invoked from the thread which executes the
+      transaction. Table locks are requested before record locks. Some other
+      transaction can't change trx->lock.wait_lock from table to record for the
+      current transaction at this point, because the current transaction has not
+      requested record locks yet. There is no need to move any table locks by
+      other threads. And trx->lock.wait_lock can't be set to null while we are
+      holding lock_sys.wait_mutex. That's why there is no need to reload
+      trx->lock.wait_lock here. */
+      ut_ad(lock == trx->lock.wait_lock);
+resolve_table_lock:
+      dict_table_t *table= lock->un_member.tab_lock.table;
+      if (!table->lock_mutex_trylock())
+      {
+        /* The correct latching order is:
+        lock_sys.latch, table->lock_mutex_lock(), lock_sys.wait_mutex.
+        Thus, we must release lock_sys.wait_mutex for a blocking wait. */
+        mysql_mutex_unlock(&lock_sys.wait_mutex);
+        table->lock_mutex_lock();
+        mysql_mutex_lock(&lock_sys.wait_mutex);
+        /* Cache trx->lock.wait_lock under the corresponding latches. */
+        lock= trx->lock.wait_lock;
+        if (!lock)
+          goto retreat;
+        else if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
+        {
+          err= DB_DEADLOCK;
+          goto retreat;
+        }
+      }
+      else
+        /* Cache trx->lock.wait_lock under the corresponding latches if
+        it was not cached yet */
+        lock= trx->lock.wait_lock;
+      if (lock->is_waiting())
+        lock_cancel_waiting_and_release(lock);
+      /* Even if lock->is_waiting() did not hold above, we must return
+      DB_LOCK_WAIT, or otherwise optimistic parallel replication could
+      occasionally hang. Potentially affected tests:
+      rpl.rpl_parallel_optimistic
+      rpl.rpl_parallel_optimistic_nobinlog
+      rpl.rpl_parallel_optimistic_xa_lsu_off */
+      err= DB_LOCK_WAIT;
+retreat:
+      table->lock_mutex_unlock();
+    }
+    lock_sys.rd_unlock();
+  }
+  else
+  {
+    /* To prevent the record lock from being moved between pages
+    during a page split or merge, we must hold exclusive lock_sys.latch. */
+    if (!lock_sys.wr_lock_try())
+    {
+      mysql_mutex_unlock(&lock_sys.wait_mutex);
+      lock_sys.wr_lock(SRW_LOCK_CALL);
+      mysql_mutex_lock(&lock_sys.wait_mutex);
+      /* Cache trx->lock.wait_lock under the corresponding latches. */
+      lock= trx->lock.wait_lock;
+      /* Even if waiting lock was cancelled while lock_sys.wait_mutex was
+      unlocked, we need to return deadlock error if transaction was chosen
+      as deadlock victim to rollback it */
+      if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
+        err= DB_DEADLOCK;
+      else if (lock)
+        goto resolve_record_lock;
+    }
+    else
+    {
+      /* Cache trx->lock.wait_lock under the corresponding latches if
+      it was not cached yet */
+      lock= trx->lock.wait_lock;
+resolve_record_lock:
+      if (lock->is_waiting())
+        lock_cancel_waiting_and_release(lock);
+      /* Even if lock->is_waiting() did not hold above, we must return
+      DB_LOCK_WAIT, or otherwise optimistic parallel replication could
+      occasionally hang. Potentially affected tests:
+      rpl.rpl_parallel_optimistic
+      rpl.rpl_parallel_optimistic_nobinlog
+      rpl.rpl_parallel_optimistic_xa_lsu_off */
+      err= DB_LOCK_WAIT;
+    }
+    lock_sys.wr_unlock();
+  }
+
+  return err;
+}
+
+template dberr_t lock_sys_t::cancel<false>(trx_t *, lock_t *);
+
+/*********************************************************************//**
+Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
+function should be called at the the end of an SQL statement, by the
+connection thread that owns the transaction (trx->mysql_thd). */
+void
+lock_unlock_table_autoinc(
+/*======================*/
+	trx_t*	trx)	/*!< in/out: transaction */
+{
+	lock_sys.assert_unlocked();
+	ut_ad(!trx->mutex_is_owner());
+	ut_ad(!trx->lock.wait_lock);
+
+	/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
+	but not COMMITTED transactions. */
+
+	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
+	      || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+
+	/* This function is invoked for a running transaction by the
+	thread that is serving the transaction. Therefore it is not
+	necessary to hold trx->mutex here. */
+
+	if (lock_trx_holds_autoinc_locks(trx)) {
+		lock_release_autoinc_locks(trx);
+	}
+}
+
+/** Handle a pending lock wait (DB_LOCK_WAIT) in a semi-consistent read
+while holding a clustered index leaf page latch.
+
+@param trx           transaction that is or was waiting for a lock
+@retval DB_SUCCESS   if the lock was granted
+@retval DB_DEADLOCK  if the transaction must be aborted due to a deadlock
+@retval DB_LOCK_WAIT if a lock wait would be necessary; the pending
+                     lock request was released */
+dberr_t lock_trx_handle_wait(trx_t *trx)
+{
+  DEBUG_SYNC_C("lock_trx_handle_wait_enter");
+  if (trx->lock.was_chosen_as_deadlock_victim)
+    return DB_DEADLOCK;
+  DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check");
+  /* trx->lock.was_chosen_as_deadlock_victim must always be set before
+  trx->lock.wait_lock if the transaction was chosen as deadlock victim,
+  the function must not return DB_SUCCESS if
+  trx->lock.was_chosen_as_deadlock_victim is set. */
+  if (!trx->lock.wait_lock)
+    return trx->lock.was_chosen_as_deadlock_victim ? DB_DEADLOCK : DB_SUCCESS;
+  dberr_t err= DB_SUCCESS;
+  mysql_mutex_lock(&lock_sys.wait_mutex);
+  if (trx->lock.was_chosen_as_deadlock_victim)
+    err= DB_DEADLOCK;
+  /* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
+  else if (lock_t *wait_lock= trx->lock.wait_lock)
+    err= lock_sys_t::cancel<true>(trx, wait_lock);
+  lock_sys.deadlock_check();
+  mysql_mutex_unlock(&lock_sys.wait_mutex);
+  return err;
+}
+
+#ifdef UNIV_DEBUG
+/**
+  Do an exhaustive check for any locks (table or rec) against the table.
+
+  @param[in]  table  check if there are any locks held on records in this table
+                     or on the table itself
+*/
+
+static my_bool lock_table_locks_lookup(rw_trx_hash_element_t *element,
+                                       const dict_table_t *table)
+{
+  lock_sys.assert_locked();
+  element->mutex.wr_lock();
+  if (element->trx)
+  {
+    element->trx->mutex_lock();
+    check_trx_state(element->trx);
+    if (element->trx->state != TRX_STATE_COMMITTED_IN_MEMORY)
+    {
+      for (const lock_t *lock= UT_LIST_GET_FIRST(element->trx->lock.trx_locks);
+           lock != NULL;
+           lock= UT_LIST_GET_NEXT(trx_locks, lock))
+      {
+        ut_ad(lock->trx == element->trx);
+        if (!lock->is_table())
+        {
+          ut_ad(lock->index->online_status != ONLINE_INDEX_CREATION ||
+                lock->index->is_primary());
+          ut_ad(lock->index->table != table);
+        }
+        else
+          ut_ad(lock->un_member.tab_lock.table != table);
+      }
+    }
+    element->trx->mutex_unlock();
+  }
+  element->mutex.wr_unlock();
+  return 0;
+}
+#endif /* UNIV_DEBUG */
+
+/** Check if there are any locks on a table.
+@return true if table has either table or record locks. */
+TRANSACTIONAL_TARGET
+bool lock_table_has_locks(dict_table_t *table)
+{
+  if (table->n_rec_locks)
+    return true;
+  ulint len;
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+  if (xbegin())
+  {
+    if (table->lock_mutex_is_locked())
+      xabort();
+    len= UT_LIST_GET_LEN(table->locks);
+    xend();
+  }
+  else
+#endif
+  {
+    table->lock_mutex_lock();
+    len= UT_LIST_GET_LEN(table->locks);
+    table->lock_mutex_unlock();
+  }
+  if (len)
+    return true;
+#ifdef UNIV_DEBUG
+  {
+    LockMutexGuard g{SRW_LOCK_CALL};
+    trx_sys.rw_trx_hash.iterate(lock_table_locks_lookup,
+                                const_cast<const dict_table_t*>(table));
+  }
+#endif /* UNIV_DEBUG */
+  return false;
+}
+
+/*******************************************************************//**
+Initialise the table lock list. */
+void
+lock_table_lock_list_init(
+/*======================*/
+	table_lock_list_t*	lock_list)	/*!< List to initialise */
+{
+	UT_LIST_INIT(*lock_list, &lock_table_t::locks);
+}
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return the strongest lock found on any sys table or 0 for none */
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+	const trx_t*	trx)	/*!< in: transaction to check */
+{
+	const lock_t*	strongest_lock = 0;
+	lock_mode	strongest = LOCK_NONE;
+
+	LockMutexGuard g{SRW_LOCK_CALL};
+
+	const lock_list::const_iterator end = trx->lock.table_locks.end();
+	lock_list::const_iterator it = trx->lock.table_locks.begin();
+
+	/* Find a valid mode. Note: ib_vector_size() can be 0. */
+
+	for (/* No op */; it != end; ++it) {
+		const lock_t*	lock = *it;
+
+		if (lock != NULL
+		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
+
+			strongest = lock->mode();
+			ut_ad(strongest != LOCK_NONE);
+			strongest_lock = lock;
+			break;
+		}
+	}
+
+	if (strongest == LOCK_NONE) {
+		return(NULL);
+	}
+
+	for (/* No op */; it != end; ++it) {
+		const lock_t*	lock = *it;
+
+		if (lock == NULL) {
+			continue;
+		}
+
+		ut_ad(trx == lock->trx);
+		ut_ad(lock->is_table());
+		ut_ad(lock->un_member.tab_lock.table);
+
+		lock_mode mode = lock->mode();
+
+		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
+		    && lock_mode_stronger_or_eq(mode, strongest)) {
+
+			strongest = mode;
+			strongest_lock = lock;
+		}
+	}
+
+	return(strongest_lock);
+}
+
+/** Check if the transaction holds an explicit exclusive lock on a record.
+@param[in]	trx	transaction
+@param[in]	table	table
+@param[in]	id	leaf page identifier
+@param[in]	heap_no	heap number identifying the record
+@return whether an explicit X-lock is held */
+bool lock_trx_has_expl_x_lock(const trx_t &trx, const dict_table_t &table,
+                              page_id_t id, ulint heap_no)
+{
+  ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
+  ut_ad(lock_table_has(&trx, &table, LOCK_IX));
+  if (!lock_table_has(&trx, &table, LOCK_X))
+  {
+    LockGuard g{lock_sys.rec_hash, id};
+    ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
+                            g.cell(), id, heap_no, &trx));
+  }
+  return true;
+}
+#endif /* UNIV_DEBUG */
+
+namespace Deadlock
+{
+  /** rewind(3) the file used for storing the latest detected deadlock and
+  print a heading message to stderr if printing of all deadlocks to stderr
+  is enabled. */
+  static void start_print()
+  {
+    lock_sys.assert_locked();
+
+    rewind(lock_latest_err_file);
+    ut_print_timestamp(lock_latest_err_file);
+
+    if (srv_print_all_deadlocks)
+      ib::info() << "Transactions deadlock detected,"
+                    " dumping detailed information.";
+  }
+
+  /** Print a message to the deadlock file and possibly to stderr.
+  @param msg message to print */
+  static void print(const char *msg)
+  {
+    fputs(msg, lock_latest_err_file);
+    if (srv_print_all_deadlocks)
+      ib::info() << msg;
+  }
+
+  /** Print transaction data to the deadlock file and possibly to stderr.
+  @param trx transaction */
+  static void print(const trx_t &trx)
+  {
+    lock_sys.assert_locked();
+
+    ulint n_rec_locks= trx.lock.n_rec_locks;
+    ulint n_trx_locks= UT_LIST_GET_LEN(trx.lock.trx_locks);
+    ulint heap_size= mem_heap_get_size(trx.lock.lock_heap);
+
+    trx_print_low(lock_latest_err_file, &trx, 3000,
+                  n_rec_locks, n_trx_locks, heap_size);
+
+    if (srv_print_all_deadlocks)
+      trx_print_low(stderr, &trx, 3000, n_rec_locks, n_trx_locks, heap_size);
+  }
+
+  /** Print lock data to the deadlock file and possibly to stderr.
+  @param lock record or table type lock */
+  static void print(const lock_t &lock)
+  {
+    lock_sys.assert_locked();
+
+    if (!lock.is_table())
+    {
+      mtr_t mtr;
+      lock_rec_print(lock_latest_err_file, &lock, mtr);
+
+      if (srv_print_all_deadlocks)
+        lock_rec_print(stderr, &lock, mtr);
+    }
+    else
+    {
+      lock_table_print(lock_latest_err_file, &lock);
+
+      if (srv_print_all_deadlocks)
+        lock_table_print(stderr, &lock);
+    }
+  }
+
+  ATTRIBUTE_COLD
+  /** Calculate a number used to compare deadlock victim candidates.
+Bit 62 is used to prefer transaction that did not modified non-transactional
+tables. Bits 1-61 are set to TRX_WEIGHT to prefer transactions with less locks
+and less modified rows. Bit 0 is used to prefer orig_trx in case of a tie.
+  @param trx  Transaction
+  @return a 64-bit unsigned, the lower the more preferred TRX is as a deadlock
+          victim */
+  static undo_no_t calc_victim_weight(trx_t *trx, const trx_t *orig_trx)
+  {
+    const undo_no_t trx_weight= (trx != orig_trx) | (TRX_WEIGHT(trx) << 1) |
+      (trx->mysql_thd &&
+#ifdef WITH_WSREP
+       (thd_has_edited_nontrans_tables(trx->mysql_thd) ||
+        (trx->is_wsrep() && wsrep_thd_is_BF(trx->mysql_thd, false)))
+#else
+       thd_has_edited_nontrans_tables(trx->mysql_thd)
+#endif /* WITH_WSREP */
+       ? 1ULL << 62 : 0);
+    return trx_weight;
+  }
+
+  ATTRIBUTE_COLD
+  /** Report a deadlock (cycle in the waits-for graph).
+  @param trx        transaction waiting for a lock in this thread
+  @param current_trx whether trx belongs to the current thread
+  @return the transaction to be rolled back (unless one was committed already)
+  @return nullptr if no deadlock */
+  static trx_t *report(trx_t *const trx, bool current_trx)
+  {
+    mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+    ut_ad(xtest() || lock_sys.is_writer() == !current_trx);
+
+    /* Normally, trx should be a direct part of the deadlock
+    cycle. However, if innodb_deadlock_detect had been OFF in the
+    past, or if current_trx=false, trx may be waiting for a lock that
+    is held by a participant of a pre-existing deadlock, without being
+    part of the deadlock itself. That is, the path to the deadlock may be
+    P-shaped instead of O-shaped, with trx being at the foot of the P.
+
+    We will process the entire path leading to a cycle, and we will
+    choose the victim (to be aborted) among the cycle. */
+
+    static const char rollback_msg[]= "*** WE ROLL BACK TRANSACTION (%u)\n";
+    char buf[9 + sizeof rollback_msg];
+    trx_t *victim= nullptr;
+
+    /* Here, lock elision does not make sense, because
+    for the output we are going to invoke system calls,
+    which would interrupt a memory transaction. */
+    if (current_trx && !lock_sys.wr_lock_try())
+    {
+      mysql_mutex_unlock(&lock_sys.wait_mutex);
+      lock_sys.wr_lock(SRW_LOCK_CALL);
+      mysql_mutex_lock(&lock_sys.wait_mutex);
+    }
+
+    {
+      unsigned l= 1;
+      /* Now that we are holding lock_sys.wait_mutex again, check
+      whether a cycle still exists. */
+      trx_t *cycle= find_cycle(trx);
+      if (!cycle)
+        goto func_exit; /* One of the transactions was already aborted. */
+
+      victim= cycle;
+      undo_no_t victim_weight= calc_victim_weight(victim, trx);
+      unsigned victim_pos= l;
+      for (trx_t *next= cycle;;)
+      {
+        next= next->lock.wait_trx;
+        l++;
+        const undo_no_t next_weight= calc_victim_weight(next, trx);
+#ifdef HAVE_REPLICATION
+        const int pref=
+          thd_deadlock_victim_preference(victim->mysql_thd, next->mysql_thd);
+        /* Set bit 63 for any non-preferred victim to make such preference take
+        priority in the weight comparison.
+        -1 means victim is preferred. 1 means next is preferred. */
+        undo_no_t victim_not_pref= (1ULL << 63) & (undo_no_t)(int64_t)(-pref);
+        undo_no_t next_not_pref= (1ULL << 63) & (undo_no_t)(int64_t)pref;
+#else
+        undo_no_t victim_not_pref= 0;
+        undo_no_t next_not_pref= 0;
+#endif
+        /* Single comparison to decide which of two transactions is preferred
+        as a deadlock victim.
+         - If thd_deadlock_victim_preference() returned non-zero, bit 63
+           comparison will decide the preferred one.
+         - Else if exactly one of them modified non-transactional tables,
+           bit 62 will decide.
+         - Else the TRX_WEIGHT in bits 1-61 will decide, if not equal.
+         - Else, if one of them is the original trx, bit 0 will decide.
+         - If all is equal, previous victim will arbitrarily be chosen. */
+        if ((next_weight|next_not_pref) < (victim_weight|victim_not_pref))
+        {
+          victim_weight= next_weight;
+          victim= next;
+          victim_pos= l;
+        }
+        if (next == cycle)
+          break;
+      }
+
+      /* Finally, display the deadlock */
+      switch (const auto r= static_cast<enum report>(innodb_deadlock_report)) {
+      case REPORT_OFF:
+        break;
+      case REPORT_BASIC:
+      case REPORT_FULL:
+        start_print();
+        l= 0;
+
+        for (trx_t *next= cycle;;)
+        {
+          next= next->lock.wait_trx;
+          ut_ad(next);
+          ut_ad(next->state == TRX_STATE_ACTIVE);
+          const lock_t *wait_lock= next->lock.wait_lock;
+          ut_ad(wait_lock);
+          snprintf(buf, sizeof buf, "\n*** (%u) TRANSACTION:\n", ++l);
+          print(buf);
+          print(*next);
+          print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
+          print(*wait_lock);
+          if (r == REPORT_BASIC);
+          else if (wait_lock->is_table())
+          {
+            if (const lock_t *lock=
+                UT_LIST_GET_FIRST(wait_lock->un_member.tab_lock.table->locks))
+            {
+              ut_ad(!lock->is_waiting());
+              print("*** CONFLICTING WITH:\n");
+              do
+                print(*lock);
+              while ((lock= UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) &&
+                     !lock->is_waiting());
+            }
+            else
+              ut_ad("no conflicting table lock found" == 0);
+          }
+          else
+          {
+            const page_id_t id{wait_lock->un_member.rec_lock.page_id};
+            hash_cell_t &cell= *(wait_lock->type_mode & LOCK_PREDICATE
+                                 ? lock_sys.prdt_hash : lock_sys.rec_hash).
+              cell_get(id.fold());
+            if (const lock_t *lock= lock_sys_t::get_first(cell, id))
+            {
+              const ulint heap_no= lock_rec_find_set_bit(wait_lock);
+              if (!lock_rec_get_nth_bit(lock, heap_no))
+                lock= lock_rec_get_next_const(heap_no, lock);
+              ut_ad(!lock->is_waiting());
+              print("*** CONFLICTING WITH:\n");
+              do
+                print(*lock);
+              while ((lock= lock_rec_get_next_const(heap_no, lock)) &&
+                     !lock->is_waiting());
+            }
+            else
+              ut_ad("no conflicting record lock found" == 0);
+          }
+          if (next == cycle)
+            break;
+        }
+        snprintf(buf, sizeof buf, rollback_msg, victim_pos);
+        print(buf);
+      }
+
+      ut_ad(victim->state == TRX_STATE_ACTIVE);
+
+      /* victim->lock.was_chosen_as_deadlock_victim must always be set before
+      releasing waiting locks and reseting trx->lock.wait_lock */
+      victim->lock.was_chosen_as_deadlock_victim= true;
+      DEBUG_SYNC_C("deadlock_report_before_lock_releasing");
+      lock_cancel_waiting_and_release<true>(victim->lock.wait_lock);
+#ifdef WITH_WSREP
+      if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd))
+        wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd);
+#endif
+    }
+
+func_exit:
+    if (current_trx)
+      lock_sys.wr_unlock();
+    return victim;
+  }
+}
+
+/** Check if a lock request results in a deadlock.
+Resolve a deadlock by choosing a transaction that will be rolled back.
+@param trx        transaction requesting a lock
+@param wait_lock  the lock being requested
+@return the lock that trx is or was waiting for
+@retval nullptr if the lock wait was resolved
+@retval -1 if trx must report DB_DEADLOCK */
+static lock_t *Deadlock::check_and_resolve(trx_t *trx, lock_t *wait_lock)
+{
+  mysql_mutex_assert_owner(&lock_sys.wait_mutex);
+
+  ut_ad(!trx->mutex_is_owner());
+  ut_ad(trx->state == TRX_STATE_ACTIVE);
+  ut_ad(!srv_read_only_mode);
+  ut_ad(wait_lock);
+
+  if (!innodb_deadlock_detect)
+    return wait_lock;
+
+  if (UNIV_LIKELY_NULL(find_cycle(trx)))
+  {
+    if (report(trx, true) == trx)
+      return reinterpret_cast<lock_t*>(-1);
+    /* Because report() released and reacquired lock_sys.wait_mutex,
+    another thread may have cleared trx->lock.wait_lock meanwhile. */
+    wait_lock= trx->lock.wait_lock;
+  }
+
+  if (UNIV_LIKELY(!trx->lock.was_chosen_as_deadlock_victim))
+    return wait_lock;
+
+  if (wait_lock)
+    lock_sys_t::cancel<false>(trx, wait_lock);
+
+  lock_sys.deadlock_check();
+  return reinterpret_cast<lock_t*>(-1);
+}
+
+/** Check for deadlocks while holding only lock_sys.wait_mutex. */
+TRANSACTIONAL_TARGET
+void lock_sys_t::deadlock_check()
+{
+  ut_ad(!is_writer());
+  mysql_mutex_assert_owner(&wait_mutex);
+  bool acquired= false;
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+  bool elided= false;
+#endif
+
+  if (Deadlock::to_be_checked)
+  {
+    for (;;)
+    {
+      auto i= Deadlock::to_check.begin();
+      if (i == Deadlock::to_check.end())
+        break;
+      if (acquired);
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+      else if (xbegin())
+      {
+        if (latch.is_locked_or_waiting())
+          xabort();
+        acquired= elided= true;
+      }
+#endif
+      else
+      {
+        acquired= wr_lock_try();
+        if (!acquired)
+        {
+          acquired= true;
+          mysql_mutex_unlock(&wait_mutex);
+          lock_sys.wr_lock(SRW_LOCK_CALL);
+          mysql_mutex_lock(&wait_mutex);
+          continue;
+        }
+      }
+      trx_t *trx= *i;
+      Deadlock::to_check.erase(i);
+      if (Deadlock::find_cycle(trx))
+        Deadlock::report(trx, false);
+    }
+    Deadlock::to_be_checked= false;
+  }
+  ut_ad(Deadlock::to_check.empty());
+#if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
+  if (elided)
+    return;
+#endif
+  if (acquired)
+    wr_unlock();
+}
+
+/** Update the locks when a page is split and merged to two pages,
+in defragmentation. */
+void lock_update_split_and_merge(
+	const buf_block_t* left_block,	/*!< in: left page to which merged */
+	const rec_t* orig_pred,		/*!< in: original predecessor of
+					supremum on the left page before merge*/
+	const buf_block_t* right_block)	/*!< in: right page from which merged */
+{
+  ut_ad(page_is_leaf(left_block->page.frame));
+  ut_ad(page_is_leaf(right_block->page.frame));
+  ut_ad(page_align(orig_pred) == left_block->page.frame);
+
+  const page_id_t l{left_block->page.id()};
+  const page_id_t r{right_block->page.id()};
+  const rec_t *left_next_rec= page_rec_get_next_const(orig_pred);
+  if (UNIV_UNLIKELY(!left_next_rec))
+  {
+    ut_ad("corrupted page" == 0);
+    return;
+  }
+  ut_ad(!page_rec_is_metadata(left_next_rec));
+
+  /* This would likely be too large for a memory transaction. */
+  LockMultiGuard g{lock_sys.rec_hash, l, r};
+
+  /* Inherit the locks on the supremum of the left page to the
+  first record which was moved from the right page */
+  lock_rec_inherit_to_gap(g.cell1(), l, g.cell1(), l, left_block->page.frame,
+                          page_rec_get_heap_no(left_next_rec),
+                          PAGE_HEAP_NO_SUPREMUM);
+
+  /* Reset the locks on the supremum of the left page,
+  releasing waiting transactions */
+  lock_rec_reset_and_release_wait(g.cell1(), l, PAGE_HEAP_NO_SUPREMUM);
+
+  /* Inherit the locks to the supremum of the left page from the
+  successor of the infimum on the right page */
+  lock_rec_inherit_to_gap(g.cell1(), l, g.cell2(), r, left_block->page.frame,
+                          PAGE_HEAP_NO_SUPREMUM,
+                          lock_get_min_heap_no(right_block));
+}
diff --git a/storage/innobase/lock/lock0prdt.cc b/storage/innobase/lock/lock0prdt.cc
new file mode 100644
index 00000000..29756591
--- /dev/null
+++ b/storage/innobase/lock/lock0prdt.cc
@@ -0,0 +1,928 @@
+/*****************************************************************************
+
+Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, 2022, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file lock/lock0prdt.cc
+The transaction lock system
+
+Created 9/7/2013 Jimmy Yang
+*******************************************************/
+
+#define LOCK_MODULE_IMPLEMENTATION
+
+#include "lock0lock.h"
+#include "lock0priv.h"
+#include "lock0prdt.h"
+#include "dict0mem.h"
+#include "que0que.h"
+
+/*********************************************************************//**
+Get a minimum bounding box from a Predicate
+@return	the minimum bounding box */
+UNIV_INLINE
+rtr_mbr_t*
+prdt_get_mbr_from_prdt(
+/*===================*/
+	const lock_prdt_t*	prdt)	/*!< in: the lock predicate */
+{
+	rtr_mbr_t*	mbr_loc = reinterpret_cast<rtr_mbr_t*>(prdt->data);
+
+	return(mbr_loc);
+}
+
+/*********************************************************************//**
+Get a predicate from a lock
+@return	the predicate */
+lock_prdt_t*
+lock_get_prdt_from_lock(
+/*====================*/
+	const lock_t*	lock)	/*!< in: the lock */
+{
+	lock_prdt_t*	prdt = reinterpret_cast<lock_prdt_t*>(
+				&((reinterpret_cast<byte*>(
+					const_cast<lock_t*>(&lock[1])))[
+						UNIV_WORD_SIZE]));
+
+	return(prdt);
+}
+
+/*********************************************************************//**
+Get a minimum bounding box directly from a lock
+@return	the minimum bounding box*/
+UNIV_INLINE
+rtr_mbr_t*
+lock_prdt_get_mbr_from_lock(
+/*========================*/
+	const lock_t*	lock)	/*!< in: the lock */
+{
+	ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+	lock_prdt_t*	prdt = lock_get_prdt_from_lock(lock);
+
+	rtr_mbr_t*	mbr_loc = prdt_get_mbr_from_prdt(prdt);
+
+	return(mbr_loc);
+}
+
+/*********************************************************************//**
+Append a predicate to the lock */
+void
+lock_prdt_set_prdt(
+/*===============*/
+	lock_t*			lock,	/*!< in: lock */
+	const lock_prdt_t*	prdt)	/*!< in: Predicate */
+{
+	ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+	memcpy(&(((byte*) &lock[1])[UNIV_WORD_SIZE]), prdt, sizeof *prdt);
+}
+
+
+/** Check whether two predicate locks are compatible with each other
+@param[in]	prdt1	first predicate lock
+@param[in]	prdt2	second predicate lock
+@param[in]	op	predicate comparison operator
+@return	true if consistent */
+static
+bool
+lock_prdt_consistent(
+	lock_prdt_t*	prdt1,
+	lock_prdt_t*	prdt2,
+	ulint		op)
+{
+	bool		ret = false;
+	rtr_mbr_t*	mbr1 = prdt_get_mbr_from_prdt(prdt1);
+	rtr_mbr_t*	mbr2 = prdt_get_mbr_from_prdt(prdt2);
+	ulint		action;
+
+	if (op) {
+		action = op;
+	} else {
+		if (prdt2->op != 0 && (prdt1->op != prdt2->op)) {
+			return(false);
+		}
+
+		action = prdt1->op;
+	}
+
+	switch (action) {
+	case PAGE_CUR_CONTAIN:
+		ret = MBR_CONTAIN_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_DISJOINT:
+		ret = MBR_DISJOINT_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_MBR_EQUAL:
+		ret = MBR_EQUAL_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_INTERSECT:
+		ret = MBR_INTERSECT_CMP(mbr1, mbr2);
+		break;
+	case PAGE_CUR_WITHIN:
+		ret = MBR_WITHIN_CMP(mbr1, mbr2);
+		break;
+	default:
+		ib::error() << "invalid operator " << action;
+		ut_error;
+	}
+
+	return(ret);
+}
+
+/*********************************************************************//**
+Checks if a predicate lock request for a new lock has to wait for
+another lock.
+@return	true if new lock has to wait for lock2 to be released */
+bool
+lock_prdt_has_to_wait(
+/*==================*/
+	const trx_t*	trx,	/*!< in: trx of new lock */
+	unsigned	type_mode,/*!< in: precise mode of the new lock
+				to set: LOCK_S or LOCK_X, possibly
+				ORed to LOCK_PREDICATE or LOCK_PRDT_PAGE,
+				LOCK_INSERT_INTENTION */
+	lock_prdt_t*	prdt,	/*!< in: lock predicate to check */
+	const lock_t*	lock2)	/*!< in: another record lock; NOTE that
+				it is assumed that this has a lock bit
+				set on the same record as in the new
+				lock we are setting */
+{
+	lock_prdt_t*	cur_prdt = lock_get_prdt_from_lock(lock2);
+
+	ut_ad(trx && lock2);
+	ut_ad((lock2->type_mode & LOCK_PREDICATE && type_mode & LOCK_PREDICATE)
+	      || (lock2->type_mode & LOCK_PRDT_PAGE
+		  && type_mode & LOCK_PRDT_PAGE));
+
+	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+	if (trx != lock2->trx
+	    && !lock_mode_compatible(static_cast<lock_mode>(
+			             LOCK_MODE_MASK & type_mode),
+				     lock2->mode())) {
+
+		/* If it is a page lock, then return true (conflict) */
+		if (type_mode & LOCK_PRDT_PAGE) {
+			ut_ad(lock2->type_mode & LOCK_PRDT_PAGE);
+
+			return(true);
+		}
+
+		/* Predicate lock does not conflicts with non-predicate lock */
+		if (!(lock2->type_mode & LOCK_PREDICATE)) {
+			return(FALSE);
+		}
+
+		ut_ad(lock2->type_mode & LOCK_PREDICATE);
+
+		if (!(type_mode & LOCK_INSERT_INTENTION)) {
+			/* PREDICATE locks without LOCK_INSERT_INTENTION flag
+			do not need to wait for anything. This is because
+			different users can have conflicting lock types
+			on predicates. */
+
+			return(FALSE);
+		}
+
+		if (lock2->type_mode & LOCK_INSERT_INTENTION) {
+
+			/* No lock request needs to wait for an insert
+			intention lock to be removed. This makes it similar
+			to GAP lock, that allows conflicting insert intention
+			locks */
+			return(FALSE);
+		}
+
+		if (!lock_prdt_consistent(cur_prdt, prdt, 0)) {
+			return(false);
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*********************************************************************//**
+Checks if a transaction has a GRANTED stronger or equal predicate lock
+on the page
+@return	lock or NULL */
+UNIV_INLINE
+lock_t*
+lock_prdt_has_lock(
+/*===============*/
+	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X */
+	hash_cell_t&		cell,		/*!< hash table cell of id */
+	const page_id_t		id,		/*!< in: page identifier */
+	lock_prdt_t*		prdt,		/*!< in: The predicate to be
+						attached to the new lock */
+	const trx_t*		trx)		/*!< in: transaction */
+{
+	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
+	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
+	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
+
+	for (lock_t*lock= lock_sys_t::get_first(cell, id, PRDT_HEAPNO);
+	     lock;
+	     lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+		ut_ad(lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+		if (lock->trx == trx
+		    && !(lock->type_mode & (LOCK_INSERT_INTENTION | LOCK_WAIT))
+		    && lock_mode_stronger_or_eq(
+			    lock->mode(),
+			    static_cast<lock_mode>(
+				    precise_mode & LOCK_MODE_MASK))) {
+			if (lock->type_mode & LOCK_PRDT_PAGE) {
+				return(lock);
+			}
+
+			lock_prdt_t*	cur_prdt = lock_get_prdt_from_lock(
+							lock);
+
+			/* if the lock predicate operator is the same
+			as the one to look, and prdicate test is successful,
+			then we find a lock */
+			if (cur_prdt->op == prdt->op
+			    && lock_prdt_consistent(cur_prdt, prdt, 0)) {
+
+				return(lock);
+			}
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Checks if some other transaction has a conflicting predicate
+lock request in the queue, so that we have to wait.
+@return	lock or NULL */
+static
+lock_t*
+lock_prdt_other_has_conflicting(
+/*============================*/
+	unsigned		mode,	/*!< in: LOCK_S or LOCK_X,
+					possibly ORed to LOCK_PREDICATE or
+					LOCK_PRDT_PAGE, LOCK_INSERT_INTENTION */
+	const hash_cell_t&	cell,	/*!< in: hash table cell */
+	const page_id_t		id,	/*!< in: page identifier */
+	lock_prdt_t*		prdt,    /*!< in: Predicates (currently)
+					the Minimum Bounding Rectangle)
+					the new lock will be on */
+	const trx_t*		trx)	/*!< in: our transaction */
+{
+	for (lock_t* lock = lock_sys_t::get_first(cell, id, PRDT_HEAPNO);
+	     lock != NULL;
+	     lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+
+		if (lock->trx == trx) {
+			continue;
+		}
+
+		if (lock_prdt_has_to_wait(trx, mode, prdt, lock)) {
+			return(lock);
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Reset the Minimum Bounding Rectangle (to a large area) */
+static
+void
+lock_prdt_enlarge_mbr(
+/*==================*/
+	const lock_t*	lock,	/*!< in/out: lock to modify */
+	rtr_mbr_t*	mbr)    /*!< in: Minimum Bounding Rectangle */
+{
+	rtr_mbr_t*	cur_mbr = lock_prdt_get_mbr_from_lock(lock);
+
+	if (cur_mbr->xmin > mbr->xmin) {
+		cur_mbr->xmin = mbr->xmin;
+	}
+
+	if (cur_mbr->ymin > mbr->ymin) {
+		cur_mbr->ymin = mbr->ymin;
+	}
+
+	if (cur_mbr->xmax < mbr->xmax) {
+		cur_mbr->xmax = mbr->xmax;
+	}
+
+	if (cur_mbr->ymax < mbr->ymax) {
+		cur_mbr->ymax = mbr->ymax;
+	}
+}
+
+/*********************************************************************//**
+Reset the predicates to a "covering" (larger) predicates */
+static
+void
+lock_prdt_enlarge_prdt(
+/*===================*/
+	lock_t*		lock,	/*!< in/out: lock to modify */
+	lock_prdt_t*	prdt)	/*!< in: predicate */
+{
+	rtr_mbr_t*	mbr = prdt_get_mbr_from_prdt(prdt);
+
+	lock_prdt_enlarge_mbr(lock, mbr);
+}
+
+/*********************************************************************//**
+Check two predicates' MBRs are the same
+@return	true if they are the same */
+static
+bool
+lock_prdt_is_same(
+/*==============*/
+	lock_prdt_t*	prdt1,		/*!< in: MBR with the lock */
+	lock_prdt_t*	prdt2)		/*!< in: MBR with the lock */
+{
+	rtr_mbr_t*	mbr1 = prdt_get_mbr_from_prdt(prdt1);
+	rtr_mbr_t*	mbr2 = prdt_get_mbr_from_prdt(prdt2);
+
+	if (prdt1->op == prdt2->op && MBR_EQUAL_CMP(mbr1, mbr2)) {
+		return(true);
+	}
+
+	return(false);
+}
+
+/*********************************************************************//**
+Looks for a similar predicate lock struct by the same trx on the same page.
+This can be used to save space when a new record lock should be set on a page:
+no new struct is needed, if a suitable old one is found.
+@return	lock or NULL */
+static
+lock_t*
+lock_prdt_find_on_page(
+/*===================*/
+	unsigned		type_mode,	/*!< in: lock type_mode field */
+	const buf_block_t*	block,		/*!< in: buffer block */
+	lock_prdt_t*		prdt,		/*!< in: MBR with the lock */
+	const trx_t*		trx)		/*!< in: transaction */
+{
+	const page_id_t id{block->page.id()};
+	hash_cell_t& cell = *lock_sys.hash_get(type_mode).cell_get(id.fold());
+
+	for (lock_t *lock = lock_sys_t::get_first(cell, id);
+	     lock != NULL;
+	     lock = lock_rec_get_next_on_page(lock)) {
+
+		if (lock->trx == trx
+		    && lock->type_mode == type_mode) {
+			if (lock->type_mode & LOCK_PRDT_PAGE) {
+				return(lock);
+			}
+
+			ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+			if (lock_prdt_is_same(lock_get_prdt_from_lock(lock),
+					      prdt)) {
+				return(lock);
+			}
+		}
+	}
+
+	return(NULL);
+}
+
+/*********************************************************************//**
+Adds a predicate lock request in the predicate lock queue.
+@return	lock where the bit was set */
+static
+lock_t*
+lock_prdt_add_to_queue(
+/*===================*/
+	unsigned		type_mode,/*!< in: lock mode, wait, predicate
+					etc. flags */
+	const buf_block_t*	block,	/*!< in: buffer block containing
+					the record */
+	dict_index_t*		index,	/*!< in: index of record */
+	trx_t*			trx,	/*!< in/out: transaction */
+	lock_prdt_t*		prdt,	/*!< in: Minimum Bounding Rectangle
+					the new lock will be on */
+	bool			caller_owns_trx_mutex)
+					/*!< in: TRUE if caller owns the
+					transaction mutex */
+{
+	ut_ad(caller_owns_trx_mutex == trx->mutex_is_owner());
+	ut_ad(index->is_spatial());
+	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+#ifdef UNIV_DEBUG
+	switch (type_mode & LOCK_MODE_MASK) {
+	case LOCK_X:
+	case LOCK_S:
+		break;
+	default:
+		ut_error;
+	}
+#endif /* UNIV_DEBUG */
+
+	/* Try to extend a similar non-waiting lock on the same page */
+	if (!(type_mode & LOCK_WAIT)) {
+		const page_id_t id{block->page.id()};
+		hash_cell_t& cell = *lock_sys.hash_get(type_mode).
+			cell_get(id.fold());
+
+		for (lock_t* lock = lock_sys_t::get_first(cell, id);
+		     lock; lock = lock_rec_get_next_on_page(lock)) {
+			if (lock->is_waiting()
+			    && lock->type_mode
+			    & (LOCK_PREDICATE | LOCK_PRDT_PAGE)
+			    && lock_rec_get_nth_bit(lock, PRDT_HEAPNO)) {
+				goto create;
+			}
+		}
+
+		if (lock_t* lock = lock_prdt_find_on_page(type_mode, block,
+							  prdt, trx)) {
+			if (lock->type_mode & LOCK_PREDICATE) {
+				lock_prdt_enlarge_prdt(lock, prdt);
+			}
+
+			return lock;
+		}
+	}
+
+create:
+	/* Note: We will not pass any conflicting lock to lock_rec_create(),
+	because we should be moving an existing waiting lock request. */
+	ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx);
+
+	lock_t* lock = lock_rec_create(nullptr,
+				       type_mode, block, PRDT_HEAPNO, index,
+				       trx, caller_owns_trx_mutex);
+
+	if (lock->type_mode & LOCK_PREDICATE) {
+		lock_prdt_set_prdt(lock, prdt);
+	}
+
+	return lock;
+}
+
+/*********************************************************************//**
+Checks if locks of other transactions prevent an immediate insert of
+a predicate record.
+@return	DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_prdt_insert_check_and_lock(
+/*============================*/
+	const rec_t*	rec,	/*!< in: record after which to insert */
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	dict_index_t*	index,	/*!< in: index */
+	que_thr_t*	thr,	/*!< in: query thread */
+	mtr_t*		mtr,	/*!< in/out: mini-transaction */
+	lock_prdt_t*	prdt)	/*!< in: Predicates with Minimum Bound
+				Rectangle */
+{
+  ut_ad(block->page.frame == page_align(rec));
+  ut_ad(!index->table->is_temporary());
+  ut_ad(index->is_spatial());
+
+  trx_t *trx= thr_get_trx(thr);
+  const page_id_t id{block->page.id()};
+  dberr_t err= DB_SUCCESS;
+
+  {
+    LockGuard g{lock_sys.prdt_hash, id};
+    /* Because this code is invoked for a running transaction by
+    the thread that is serving the transaction, it is not necessary
+    to hold trx->mutex here. */
+    ut_ad(lock_table_has(trx, index->table, LOCK_IX));
+
+    /* Only need to check locks on prdt_hash */
+    if (ut_d(lock_t *lock=) lock_sys_t::get_first(g.cell(), id, PRDT_HEAPNO))
+    {
+      ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+      /* If another transaction has an explicit lock request which locks
+      the predicate, waiting or granted, on the successor, the insert
+      has to wait.
+
+      Similar to GAP lock, we do not consider lock from inserts conflicts
+      with each other */
+
+      const ulint mode= LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION;
+      lock_t *c_lock= lock_prdt_other_has_conflicting(mode, g.cell(), id,
+                                                      prdt, trx);
+
+      if (c_lock)
+      {
+        rtr_mbr_t *mbr= prdt_get_mbr_from_prdt(prdt);
+        trx->mutex_lock();
+        /* Allocate MBR on the lock heap */
+        lock_init_prdt_from_mbr(prdt, mbr, 0, trx->lock.lock_heap);
+        err= lock_rec_enqueue_waiting(c_lock, mode, id, block->page.frame,
+                                      PRDT_HEAPNO, index, thr, prdt);
+        trx->mutex_unlock();
+      }
+    }
+  }
+
+  if (err == DB_SUCCESS)
+    /* Update the page max trx id field */
+    page_update_max_trx_id(block, buf_block_get_page_zip(block), trx->id, mtr);
+
+  return err;
+}
+
+/**************************************************************//**
+Check whether any predicate lock in parent needs to propagate to
+child page after split. */
+void
+lock_prdt_update_parent(
+/*====================*/
+        buf_block_t*    left_block,	/*!< in/out: page to be split */
+        buf_block_t*    right_block,	/*!< in/out: the new half page */
+        lock_prdt_t*	left_prdt,	/*!< in: MBR on the old page */
+        lock_prdt_t*	right_prdt,	/*!< in: MBR on the new page */
+	const page_id_t	page_id)	/*!< in: parent page */
+{
+	auto fold= page_id.fold();
+	LockMutexGuard g{SRW_LOCK_CALL};
+	hash_cell_t& cell = *lock_sys.prdt_hash.cell_get(fold);
+
+	/* Get all locks in parent */
+	for (lock_t *lock = lock_sys_t::get_first(cell, page_id);
+	     lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		lock_prdt_t*	lock_prdt;
+		ulint		op = PAGE_CUR_DISJOINT;
+
+		ut_ad(lock);
+
+		if (!(lock->type_mode & LOCK_PREDICATE)
+		    || (lock->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+			continue;
+		}
+
+		lock_prdt = lock_get_prdt_from_lock(lock);
+
+		/* Check each lock in parent to see if it intersects with
+		left or right child */
+		if (!lock_prdt_consistent(lock_prdt, left_prdt, op)
+		    && !lock_prdt_find_on_page(lock->type_mode, left_block,
+					       lock_prdt, lock->trx)) {
+			lock_prdt_add_to_queue(lock->type_mode,
+					       left_block, lock->index,
+					       lock->trx, lock_prdt,
+					       false);
+		}
+
+		if (!lock_prdt_consistent(lock_prdt, right_prdt, op)
+		    && !lock_prdt_find_on_page(lock->type_mode, right_block,
+					       lock_prdt, lock->trx)) {
+			lock_prdt_add_to_queue(lock->type_mode, right_block,
+					       lock->index, lock->trx,
+					       lock_prdt, false);
+		}
+	}
+}
+
+/**************************************************************//**
+Update predicate lock when page splits */
+static
+void
+lock_prdt_update_split_low(
+/*=======================*/
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
+	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
+	const page_id_t	id,		/*!< in: page number */
+	unsigned	type_mode)	/*!< in: LOCK_PREDICATE or
+					LOCK_PRDT_PAGE */
+{
+	hash_cell_t& cell = *lock_sys.hash_get(type_mode).cell_get(id.fold());
+
+	for (lock_t* lock = lock_sys_t::get_first(cell, id);
+	     lock;
+	     lock = lock_rec_get_next_on_page(lock)) {
+		/* First dealing with Page Lock */
+		if (lock->type_mode & LOCK_PRDT_PAGE) {
+			/* Duplicate the lock to new page */
+			lock_prdt_add_to_queue(lock->type_mode,
+					       new_block,
+					       lock->index,
+					       lock->trx, nullptr, false);
+			continue;
+		}
+
+		/* Now dealing with Predicate Lock */
+		lock_prdt_t*	lock_prdt;
+		ulint		op = PAGE_CUR_DISJOINT;
+
+		ut_ad(lock->type_mode & LOCK_PREDICATE);
+
+		/* No need to duplicate waiting X locks */
+		if ((lock->type_mode & LOCK_MODE_MASK) == LOCK_X) {
+			continue;
+		}
+
+		lock_prdt = lock_get_prdt_from_lock(lock);
+
+		if (!lock_prdt_consistent(lock_prdt, new_prdt, op)) {
+			/* Move the lock to new page */
+			lock_prdt_add_to_queue(lock->type_mode, new_block,
+					       lock->index, lock->trx,
+					       lock_prdt, false);
+		}
+	}
+}
+
+/**************************************************************//**
+Update predicate lock when page splits */
+void
+lock_prdt_update_split(
+/*===================*/
+	buf_block_t*	new_block,	/*!< in/out: the new half page */
+	lock_prdt_t*	prdt,		/*!< in: MBR on the old page */
+	lock_prdt_t*	new_prdt,	/*!< in: MBR on the new page */
+	const page_id_t	page_id)	/*!< in: page number */
+{
+	LockMutexGuard g{SRW_LOCK_CALL};
+	lock_prdt_update_split_low(new_block, prdt, new_prdt,
+				   page_id, LOCK_PREDICATE);
+
+	lock_prdt_update_split_low(new_block, NULL, NULL,
+				   page_id, LOCK_PRDT_PAGE);
+}
+
+/*********************************************************************//**
+Initiate a Predicate Lock from a MBR */
+void
+lock_init_prdt_from_mbr(
+/*====================*/
+	lock_prdt_t*	prdt,	/*!< in/out: predicate to initialized */
+	rtr_mbr_t*	mbr,	/*!< in: Minimum Bounding Rectangle */
+	ulint		mode,	/*!< in: Search mode */
+	mem_heap_t*	heap)	/*!< in: heap for allocating memory */
+{
+	memset(prdt, 0, sizeof(*prdt));
+
+	if (heap != NULL) {
+		prdt->data = mem_heap_dup(heap, mbr, sizeof *mbr);
+	} else {
+		prdt->data = static_cast<void*>(mbr);
+	}
+
+	prdt->op = static_cast<uint16>(mode);
+}
+
+/*********************************************************************//**
+Acquire a predicate lock on a block
+@return	DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_prdt_lock(
+/*===========*/
+	buf_block_t*	block,	/*!< in/out: buffer block of rec */
+	lock_prdt_t*	prdt,	/*!< in: Predicate for the lock */
+	dict_index_t*	index,	/*!< in: secondary index */
+	lock_mode	mode,	/*!< in: mode of the lock which
+				the read cursor should set on
+				records: LOCK_S or LOCK_X; the
+				latter is possible in
+				SELECT FOR UPDATE */
+	unsigned	type_mode,
+				/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
+	que_thr_t*	thr)	/*!< in: query thread
+				(can be NULL if BTR_NO_LOCKING_FLAG) */
+{
+	trx_t*		trx = thr_get_trx(thr);
+	dberr_t		err = DB_SUCCESS;
+	lock_rec_req_status	status = LOCK_REC_SUCCESS;
+
+	if (trx->read_only || index->table->is_temporary()) {
+		return(DB_SUCCESS);
+	}
+
+	ut_ad(!dict_index_is_clust(index));
+	ut_ad(!dict_index_is_online_ddl(index));
+	ut_ad(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
+
+	auto& hash = lock_sys.prdt_hash_get(type_mode != LOCK_PREDICATE);
+	const page_id_t id{block->page.id()};
+
+	/* Another transaction cannot have an implicit lock on the record,
+	because when we come here, we already have modified the clustered
+	index record, and this would not have been possible if another active
+	transaction had modified this secondary index record. */
+
+	LockGuard g{hash, id};
+
+	const unsigned	prdt_mode = type_mode | mode;
+	lock_t*		lock = lock_sys_t::get_first(g.cell(), id);
+
+	if (lock == NULL) {
+		lock = lock_rec_create(
+			NULL,
+			prdt_mode, block, PRDT_HEAPNO,
+			index, trx, FALSE);
+
+		status = LOCK_REC_SUCCESS_CREATED;
+	} else {
+		if (lock_rec_get_next_on_page(lock)
+		    || lock->trx != trx
+		    || lock->type_mode != prdt_mode
+		    || lock_rec_get_n_bits(lock) == 0
+		    || ((type_mode & LOCK_PREDICATE)
+		        && (!lock_prdt_consistent(
+				lock_get_prdt_from_lock(lock), prdt, 0)))) {
+			trx->mutex_lock();
+
+			lock = lock_prdt_has_lock(
+				mode, g.cell(), id, prdt, trx);
+
+			if (lock) {
+			} else if (lock_t* wait_for
+				   = lock_prdt_other_has_conflicting(
+					   prdt_mode, g.cell(), id, prdt,
+					   trx)) {
+				err = lock_rec_enqueue_waiting(
+					wait_for, prdt_mode, id,
+					block->page.frame, PRDT_HEAPNO,
+					index, thr, prdt);
+			} else {
+				lock_prdt_add_to_queue(
+					prdt_mode, block, index, trx,
+					prdt, true);
+			}
+
+			trx->mutex_unlock();
+		} else {
+			if (!lock_rec_get_nth_bit(lock, PRDT_HEAPNO)) {
+				lock_rec_set_nth_bit(lock, PRDT_HEAPNO);
+				status = LOCK_REC_SUCCESS_CREATED;
+			}
+		}
+	}
+
+	if (status == LOCK_REC_SUCCESS_CREATED && type_mode == LOCK_PREDICATE) {
+		/* Append the predicate in the lock record */
+		lock_prdt_set_prdt(lock, prdt);
+	}
+
+	return(err);
+}
+
+/*********************************************************************//**
+Acquire a "Page" lock on a block
+@return	DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
+dberr_t
+lock_place_prdt_page_lock(
+	const page_id_t	page_id,	/*!< in: page identifier */
+	dict_index_t*	index,		/*!< in: secondary index */
+	que_thr_t*	thr)		/*!< in: query thread */
+{
+	ut_ad(thr != NULL);
+	ut_ad(!high_level_read_only);
+
+	ut_ad(index->is_spatial());
+	ut_ad(!dict_index_is_online_ddl(index));
+	if (index->table->is_temporary()) {
+		return DB_SUCCESS;
+	}
+
+	/* Another transaction cannot have an implicit lock on the record,
+	because when we come here, we already have modified the clustered
+	index record, and this would not have been possible if another active
+	transaction had modified this secondary index record. */
+
+	LockGuard g{lock_sys.prdt_page_hash, page_id};
+
+	const lock_t*	lock = lock_sys_t::get_first(g.cell(), page_id);
+	const ulint	mode = LOCK_S | LOCK_PRDT_PAGE;
+	trx_t*		trx = thr_get_trx(thr);
+
+	if (lock != NULL) {
+		/* Find a matching record lock owned by this transaction. */
+
+		while (lock != NULL && lock->trx != trx) {
+			lock = lock_rec_get_next_on_page_const(lock);
+		}
+
+		ut_ad(lock == NULL || lock->type_mode == mode);
+		ut_ad(lock == NULL || lock_rec_get_n_bits(lock) != 0);
+	}
+
+	if (lock == NULL) {
+		lock = lock_rec_create_low(
+			NULL,
+			mode, page_id, NULL, PRDT_HEAPNO,
+			index, trx, FALSE);
+
+#ifdef PRDT_DIAG
+		printf("GIS_DIAGNOSTIC: page lock %d\n", (int) page_no);
+#endif /* PRDT_DIAG */
+	}
+
+	return(DB_SUCCESS);
+}
+
+/** Check whether there are R-tree Page lock on a page
+@param[in]	trx	trx to test the lock
+@param[in]	page_id	page identifier
+@return	true if there is none */
+bool lock_test_prdt_page_lock(const trx_t *trx, const page_id_t page_id)
+{
+  LockGuard g{lock_sys.prdt_page_hash, page_id};
+  lock_t *lock= lock_sys_t::get_first(g.cell(), page_id);
+  return !lock || trx == lock->trx;
+}
+
+/*************************************************************//**
+Moves the locks of a page to another page and resets the lock bits of
+the donating records. */
+void
+lock_prdt_rec_move(
+/*===============*/
+	const buf_block_t*	receiver,	/*!< in: buffer block containing
+						the receiving record */
+	const page_id_t		donator)	/*!< in: target page */
+{
+	LockMultiGuard g{lock_sys.prdt_hash, receiver->page.id(), donator};
+
+	for (lock_t *lock = lock_sys_t::get_first(g.cell2(), donator,
+						  PRDT_HEAPNO);
+	     lock;
+	     lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
+
+		const auto type_mode = lock->type_mode;
+		lock_prdt_t*	lock_prdt = lock_get_prdt_from_lock(lock);
+
+		lock_rec_reset_nth_bit(lock, PRDT_HEAPNO);
+		if (type_mode & LOCK_WAIT) {
+			ut_ad(lock->trx->lock.wait_lock == lock);
+			lock->type_mode &= ~LOCK_WAIT;
+		}
+		lock_prdt_add_to_queue(
+			type_mode, receiver, lock->index, lock->trx,
+			lock_prdt, false);
+	}
+}
+
+/** Remove locks on a discarded SPATIAL INDEX page.
+@param id   page to be discarded
+@param page whether to discard also from lock_sys.prdt_hash */
+void lock_sys_t::prdt_page_free_from_discard(const page_id_t id, bool all)
+{
+  const auto id_fold= id.fold();
+  rd_lock(SRW_LOCK_CALL);
+  auto cell= prdt_page_hash.cell_get(id_fold);
+  auto latch= hash_table::latch(cell);
+  latch->acquire();
+
+  for (lock_t *lock= get_first(*cell, id), *next; lock; lock= next)
+  {
+    next= lock_rec_get_next_on_page(lock);
+    lock_rec_discard(prdt_page_hash, lock);
+  }
+
+  if (all)
+  {
+    latch->release();
+    cell= prdt_hash.cell_get(id_fold);
+    latch= hash_table::latch(cell);
+    latch->acquire();
+    for (lock_t *lock= get_first(*cell, id), *next; lock; lock= next)
+    {
+      next= lock_rec_get_next_on_page(lock);
+      lock_rec_discard(prdt_hash, lock);
+    }
+  }
+
+  latch->release();
+  cell= rec_hash.cell_get(id_fold);
+  latch= hash_table::latch(cell);
+  latch->acquire();
+
+  for (lock_t *lock= get_first(*cell, id), *next; lock; lock= next)
+  {
+    next= lock_rec_get_next_on_page(lock);
+    lock_rec_discard(rec_hash, lock);
+  }
+
+  latch->release();
+  /* Must be last, to avoid a race with lock_sys_t::hash_table::resize() */
+  rd_unlock();
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 12:24:36 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 12:24:36 +0000
commit	06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
tree	e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/lock
parent	Initial commit. (diff)
download	mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip