summaryrefslogtreecommitdiffstats
path: root/storage/maria/ma_locking.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/maria/ma_locking.c')
-rw-r--r--storage/maria/ma_locking.c607
1 files changed, 607 insertions, 0 deletions
diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c
new file mode 100644
index 00000000..9084be1d
--- /dev/null
+++ b/storage/maria/ma_locking.c
@@ -0,0 +1,607 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+ Locking of Maria-tables.
+ Must be first request before doing any furter calls to any Maria function.
+ Is used to allow many process use the same non transactional Maria table
+*/
+
+#include "ma_ftdefs.h"
+
+ /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
+
+int maria_lock_database(MARIA_HA *info, int lock_type)
+{
+ int error;
+ uint count;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_lock_database");
+ DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
+ "global_changed: %d open_count: %u name: '%s'",
+ lock_type, info->lock_type, share->r_locks,
+ share->w_locks,
+ share->global_changed, share->state.open_count,
+ share->index_file_name.str));
+ if (share->options & HA_OPTION_READ_ONLY_DATA ||
+ info->lock_type == lock_type)
+ DBUG_RETURN(0);
+ if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
+ {
+ ++share->w_locks;
+ ++share->tot_locks;
+ info->lock_type= lock_type;
+ DBUG_RETURN(0);
+ }
+
+ error=0;
+ if (!info->intern_lock_locked)
+ mysql_mutex_lock(&share->intern_lock);
+ if (share->kfile.file >= 0) /* May only be false on windows */
+ {
+ switch (lock_type) {
+ case F_UNLCK:
+ maria_ftparser_call_deinitializer(info);
+ if (info->lock_type == F_RDLCK)
+ {
+ count= --share->r_locks;
+ if (share->lock_restore_status)
+ (*share->lock_restore_status)(info);
+ }
+ else
+ {
+ count= --share->w_locks;
+ if (share->lock.update_status)
+ _ma_update_status_with_lock(info);
+ }
+ --share->tot_locks;
+ if (info->lock_type == F_WRLCK && !share->w_locks)
+ {
+ /* pages of transactional tables get flushed at Checkpoint */
+ if (!share->base.born_transactional && !share->temporary &&
+ _ma_flush_table_files(info,
+ share->delay_key_write ? MARIA_FLUSH_DATA :
+ MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
+ FLUSH_KEEP, FLUSH_KEEP))
+ error= my_errno;
+ }
+ if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
+ {
+ if (end_io_cache(&info->rec_cache))
+ {
+ error= my_errno;
+ _ma_set_fatal_error(info, error);
+ }
+ }
+ if (!count)
+ {
+ DBUG_PRINT("info",("changed: %u w_locks: %u",
+ (uint) share->changed, share->w_locks));
+ if (share->changed && !share->w_locks)
+ {
+#ifdef HAVE_MMAP
+ if ((share->mmaped_length !=
+ share->state.state.data_file_length) &&
+ (share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
+ {
+ if (share->lock_key_trees)
+ mysql_rwlock_wrlock(&share->mmap_lock);
+ _ma_remap_file(info, share->state.state.data_file_length);
+ share->nonmmaped_inserts= 0;
+ if (share->lock_key_trees)
+ mysql_rwlock_unlock(&share->mmap_lock);
+ }
+#endif
+#ifdef MARIA_EXTERNAL_LOCKING
+ share->state.process= share->last_process=share->this_process;
+ share->state.unique= info->last_unique= info->this_unique;
+ share->state.update_count= info->last_loop= ++info->this_loop;
+#endif
+ /* transactional tables rather flush their state at Checkpoint */
+ if (!share->base.born_transactional)
+ {
+ if (_ma_state_info_write_sub(share->kfile.file, &share->state,
+ MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
+ error= my_errno;
+ else
+ {
+ /* A value of 0 means below means "state flushed" */
+ share->changed= 0;
+ }
+ }
+ if (maria_flush)
+ {
+ if (_ma_sync_table_files(info))
+ error= my_errno;
+ }
+ else
+ share->not_flushed=1;
+ if (error)
+ _ma_set_fatal_error(info, error);
+ }
+ }
+ info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
+ info->lock_type= F_UNLCK;
+ break;
+ case F_RDLCK:
+ if (info->lock_type == F_WRLCK)
+ {
+ /*
+ Change RW to READONLY
+
+ mysqld does not turn write locks to read locks,
+ so we're never here in mysqld.
+ */
+ share->w_locks--;
+ share->r_locks++;
+ info->lock_type=lock_type;
+ break;
+ }
+#ifdef MARIA_EXTERNAL_LOCKING
+ if (!share->r_locks && !share->w_locks)
+ {
+ /* note that a transactional table should not do this */
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ {
+ error=my_errno;
+ break;
+ }
+ }
+#endif
+ _ma_test_if_changed(info);
+ share->r_locks++;
+ share->tot_locks++;
+ info->lock_type=lock_type;
+ break;
+ case F_WRLCK:
+ if (info->lock_type == F_RDLCK)
+ { /* Change READONLY to RW */
+ if (share->r_locks == 1)
+ {
+ share->r_locks--;
+ share->w_locks++;
+ info->lock_type=lock_type;
+ break;
+ }
+ }
+#ifdef MARIA_EXTERNAL_LOCKING
+ if (!(share->options & HA_OPTION_READ_ONLY_DATA))
+ {
+ if (!share->w_locks)
+ {
+ if (!share->r_locks)
+ {
+ /*
+ Note that transactional tables should not do this.
+ If we enabled this code, we should make sure to skip it if
+ born_transactional is true. We should not test
+ now_transactional to decide if we can call
+ _ma_state_info_read_dsk(), because it can temporarily be 0
+ (TRUNCATE on a partitioned table) and thus it would make a state
+ modification below without mutex, confusing a concurrent
+ checkpoint running.
+ Even if this code was enabled only for non-transactional tables:
+ in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
+ state on disk read by DELETE is obsolete as it was not flushed
+ at the end of INSERT. MyISAM same. It however causes no issue as
+ maria_delete_all_rows() calls _ma_reset_status() thus is not
+ influenced by the obsolete read values.
+ */
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ {
+ error=my_errno;
+ break;
+ }
+ }
+ }
+ }
+#endif /* defined(MARIA_EXTERNAL_LOCKING) */
+ _ma_test_if_changed(info);
+
+ info->lock_type=lock_type;
+ info->invalidator=share->invalidator;
+ share->w_locks++;
+ share->tot_locks++;
+ break;
+ default:
+ DBUG_ASSERT(0);
+ break; /* Impossible */
+ }
+ }
+#ifdef _WIN32
+ else
+ {
+ /*
+ Check for bad file descriptors if this table is part
+ of a merge union. Failing to capture this may cause
+ a crash on windows if the table is renamed and
+ later on referenced by the merge table.
+ */
+ if( info->owned_by_merge && (info->s)->kfile.file < 0 )
+ {
+ error = HA_ERR_NO_SUCH_TABLE;
+ }
+ }
+#endif
+ if (!info->intern_lock_locked)
+ mysql_mutex_unlock(&share->intern_lock);
+ DBUG_RETURN(error);
+} /* maria_lock_database */
+
+
+/****************************************************************************
+ ** functions to read / write the state
+****************************************************************************/
+
+int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
+ int lock_type __attribute__ ((unused)),
+ int check_keybuffer __attribute__ ((unused)))
+{
+#ifdef MARIA_EXTERNAL_LOCKING
+ DBUG_ENTER("_ma_readinfo");
+
+ if (info->lock_type == F_UNLCK)
+ {
+ MARIA_SHARE *share= info->s;
+ if (!share->tot_locks)
+ {
+ /* should not be done for transactional tables */
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ {
+ if (!my_errno)
+ my_errno= HA_ERR_FILE_TOO_SHORT;
+ DBUG_RETURN(1);
+ }
+ }
+ if (check_keybuffer)
+ VOID(_ma_test_if_changed(info));
+ info->invalidator=share->invalidator;
+ }
+ else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
+ {
+ my_errno=EACCES; /* Not allowed to change */
+ DBUG_RETURN(-1); /* when have read_lock() */
+ }
+ DBUG_RETURN(0);
+#else
+ return 0;
+#endif /* defined(MARIA_EXTERNAL_LOCKING) */
+} /* _ma_readinfo */
+
+
+/*
+ Every isam-function that updates the isam-database MUST end with this
+ request
+
+ NOTES
+ my_errno is not changed if this succeeds!
+*/
+
+int _ma_writeinfo(register MARIA_HA *info, uint operation)
+{
+ int error,olderror;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_writeinfo");
+ DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
+ share->tot_locks));
+
+ error=0;
+ if (share->tot_locks == 0 && !share->base.born_transactional)
+ {
+ /* transactional tables flush their state at Checkpoint */
+ if (operation)
+ { /* Two threads can't be here */
+ CRASH_IF_S3_TABLE(info->s); /* S3 readonly doesn't come here */
+
+ olderror= my_errno; /* Remember last error */
+
+#ifdef MARIA_EXTERNAL_LOCKING
+ /*
+ The following only makes sense if we want to be allow two different
+ processes access the same table at the same time
+ */
+ share->state.process= share->last_process= share->this_process;
+ share->state.unique= info->last_unique= info->this_unique;
+ share->state.update_count= info->last_loop= ++info->this_loop;
+#endif
+
+ if ((error=
+ _ma_state_info_write_sub(share->kfile.file,
+ &share->state,
+ MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)))
+ olderror=my_errno;
+#ifdef _WIN32
+ if (maria_flush)
+ {
+ _commit(share->kfile.file);
+ _commit(info->dfile.file);
+ }
+#endif
+ my_errno=olderror;
+ }
+ }
+ else if (operation)
+ share->changed= 1; /* Mark keyfile changed */
+ DBUG_RETURN(error);
+} /* _ma_writeinfo */
+
+
+/*
+ Test if an external process has changed the database
+ (Should be called after readinfo)
+*/
+
+int _ma_test_if_changed(register MARIA_HA *info)
+{
+#ifdef MARIA_EXTERNAL_LOCKING
+ MARIA_SHARE *share= info->s;
+ if (share->state.process != share->last_process ||
+ share->state.unique != info->last_unique ||
+ share->state.update_count != info->last_loop)
+ { /* Keyfile has changed */
+ DBUG_PRINT("info",("index file changed"));
+ if (share->state.process != share->this_process)
+ VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
+ FLUSH_RELEASE));
+ share->last_process=share->state.process;
+ info->last_unique= share->state.unique;
+ info->last_loop= share->state.update_count;
+ info->update|= HA_STATE_WRITTEN; /* Must use file on next */
+ info->data_changed= 1; /* For maria_is_changed */
+ return 1;
+ }
+#endif
+ return (!(info->update & HA_STATE_AKTIV) ||
+ (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
+ HA_STATE_KEY_CHANGED)));
+} /* _ma_test_if_changed */
+
+
+/*
+ Put a mark in the .MAI file that someone is updating the table
+
+ DOCUMENTATION
+ state.open_count in the .MAI file is used the following way:
+ - For the first change of the .MYI file in this process open_count is
+ incremented by _ma_mark_file_changed(). (We have a write lock on the file
+ when this happens)
+ - In maria_close() it's decremented by _ma_decrement_open_count() if it
+ was incremented in the same process.
+
+ This mean that if we are the only process using the file, the open_count
+ tells us if the MARIA file wasn't properly closed. (This is true if
+ my_disable_locking is set).
+
+ open_count is not maintained on disk for temporary tables.
+*/
+
+#define _MA_ALREADY_MARKED_FILE_CHANGED \
+ ((share->state.changed & STATE_CHANGED) && share->global_changed)
+
+int _ma_mark_file_changed(register MARIA_SHARE *share)
+{
+ if (!share->base.born_transactional)
+ {
+ if (!_MA_ALREADY_MARKED_FILE_CHANGED)
+ {
+ int res= _ma_mark_file_changed_now(share);
+ /*
+ Ensure that STATE_NOT_ANALYZED is reset on table changes
+ */
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS);
+ return res;
+ }
+ }
+ else
+ {
+ /*
+ For transactional tables, the table is marked changed when the first page
+ is written. Here we just mark the state to be updated so that caller
+ can do 'analyze table' and find that is has changed before any pages
+ are written.
+ */
+ if (! test_all_bits(share->state.changed,
+ (STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS)))
+ {
+ mysql_mutex_lock(&share->intern_lock);
+ share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
+ }
+ return 0;
+}
+
+int _ma_mark_file_changed_now(register MARIA_SHARE *share)
+{
+ uchar buff[3];
+ int error= 1;
+ DBUG_ENTER("_ma_mark_file_changed_now");
+
+ if (_MA_ALREADY_MARKED_FILE_CHANGED)
+ DBUG_RETURN(0);
+ mysql_mutex_lock(&share->intern_lock); /* recheck under mutex */
+ if (! _MA_ALREADY_MARKED_FILE_CHANGED)
+ {
+ share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
+ STATE_NOT_OPTIMIZED_KEYS);
+ if (!share->global_changed)
+ {
+ share->changed= share->global_changed= 1;
+ share->state.open_count++;
+ }
+ /*
+ Temp tables don't need an open_count as they are removed on crash.
+ In theory transactional tables are fixed by log-based recovery, so don't
+ need an open_count either, but if recovery has failed and logs have been
+ removed (by maria-force-start-after-recovery-failures), we still need to
+ detect dubious tables.
+ If we didn't maintain open_count on disk for a table, after a crash
+ we wouldn't know if it was closed at crash time (thus does not need a
+ check) or not. So we would have to check all tables: overkill.
+ */
+ if (!share->temporary)
+ {
+ CRASH_IF_S3_TABLE(share);
+ mi_int2store(buff,share->state.open_count);
+ buff[2]=1; /* Mark that it's changed */
+ if (my_pwrite(share->kfile.file, buff, sizeof(buff),
+ sizeof(share->state.header) +
+ MARIA_FILE_OPEN_COUNT_OFFSET,
+ MYF(MY_NABP)))
+ goto err;
+ }
+ /* Set uuid of file if not yet set (zerofilled file) */
+ if (share->base.born_transactional &&
+ !(share->state.org_changed & STATE_NOT_MOVABLE))
+ {
+ CRASH_IF_S3_TABLE(share);
+ /* Lock table to current installation */
+ if (_ma_set_uuid(share, 0) ||
+ (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
+ _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
+ trnman_get_min_trid(),
+ TRUE, TRUE)))
+ goto err;
+ share->state.changed|= STATE_NOT_MOVABLE;
+ share->state.org_changed|= STATE_NOT_MOVABLE;
+ }
+ }
+ error= 0;
+err:
+ mysql_mutex_unlock(&share->intern_lock);
+ DBUG_RETURN(error);
+#undef _MA_ALREADY_MARKED_FILE_CHANGED
+}
+
+/*
+ Check that a region is all zero
+
+ SYNOPSIS
+ check_if_zero()
+ pos Start of memory to check
+ length length of memory region
+
+ NOTES
+ Used mainly to detect rows with wrong extent information
+*/
+
+my_bool _ma_check_if_zero(uchar *pos, size_t length)
+{
+ uchar *end;
+ for (end= pos+ length; pos != end ; pos++)
+ if (pos[0] != 0)
+ return 1;
+ return 0;
+}
+
+/*
+ This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
+ call. In these context the following code should be safe!
+ */
+
+int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables)
+{
+ uchar buff[2];
+ register MARIA_SHARE *share= info->s;
+ int lock_error=0,write_error=0;
+ DBUG_ENTER("_ma_decrement_open_count");
+
+ if (share->global_changed)
+ {
+ uint old_lock=info->lock_type;
+ share->global_changed=0;
+ lock_error= (my_disable_locking || ! lock_tables ? 0 :
+ maria_lock_database(info, F_WRLCK));
+ /* Its not fatal even if we couldn't get the lock ! */
+ if (share->state.open_count > 0)
+ {
+ CRASH_IF_S3_TABLE(share);
+ share->state.open_count--;
+ share->changed= 1; /* We have to update state */
+ /*
+ For temporary tables that will just be deleted, we don't have
+ to decrement state. For transactional tables the state will be
+ updated in maria_close().
+ */
+
+ if (!share->temporary && !share->now_transactional)
+ {
+ mi_int2store(buff,share->state.open_count);
+ write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
+ sizeof(share->state.header) +
+ MARIA_FILE_OPEN_COUNT_OFFSET,
+ MYF(MY_NABP));
+ }
+ }
+ if (!lock_error && !my_disable_locking && lock_tables)
+ lock_error=maria_lock_database(info,old_lock);
+ }
+ DBUG_RETURN(MY_TEST(lock_error || write_error));
+}
+
+
+/** @brief mark file as crashed */
+
+void _ma_mark_file_crashed(MARIA_SHARE *share)
+{
+ uchar buff[2];
+ DBUG_ENTER("_ma_mark_file_crashed");
+
+ share->state.changed|= STATE_CRASHED;
+ if (share->no_status_updates)
+ DBUG_VOID_RETURN; /* Safety */
+
+ mi_int2store(buff, share->state.changed);
+
+ /*
+ We can ignore the errors, as if the mark failed, there isn't anything
+ else we can do; The user should already have got an error that the
+ table was crashed.
+ */
+ (void) my_pwrite(share->kfile.file, buff, sizeof(buff),
+ sizeof(share->state.header) +
+ MARIA_FILE_CHANGED_OFFSET,
+ MYF(MY_NABP));
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ @brief Set uuid of for a Maria file
+
+ @fn _ma_set_uuid()
+ @param share Maria share
+ @param reset_uuid Instead of setting file to maria_uuid, set it to
+ 0 to mark it as movable
+*/
+
+my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid)
+{
+ uchar buff[MY_UUID_SIZE], *uuid;
+
+ uuid= maria_uuid;
+ if (reset_uuid)
+ {
+ bzero(buff, sizeof(buff));
+ uuid= buff;
+ }
+ CRASH_IF_S3_TABLE(share);
+ return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE,
+ mi_uint2korr(share->state.header.base_pos),
+ MYF(MY_NABP));
+}