summaryrefslogtreecommitdiffstats
path: root/storage/maria/ma_dynrec.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/maria/ma_dynrec.c')
-rw-r--r--storage/maria/ma_dynrec.c2109
1 files changed, 2109 insertions, 0 deletions
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
new file mode 100644
index 00000000..33f238d9
--- /dev/null
+++ b/storage/maria/ma_dynrec.c
@@ -0,0 +1,2109 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/*
+ Functions to handle space-packed-records and blobs
+
+ A row may be stored in one or more linked blocks.
+ The block size is between MARIA_MIN_BLOCK_LENGTH and MARIA_MAX_BLOCK_LENGTH.
+ Each block is aligned on MARIA_DYN_ALIGN_SIZE.
+ The reson for the max block size is to not have too many different types
+ of blocks. For the differnet block types, look at _ma_get_block_info()
+*/
+
+#include "maria_def.h"
+
+static my_bool write_dynamic_record(MARIA_HA *info,const uchar *record,
+ ulong reclength);
+static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos,
+ ulong *length);
+static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uchar *record, ulong reclength);
+static my_bool delete_dynamic_record(MARIA_HA *info,MARIA_RECORD_POS filepos,
+ uint second_read);
+static my_bool _ma_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
+ uint length);
+
+ /* Interface function from MARIA_HA */
+
+#ifdef HAVE_MMAP
+
+/*
+ Create mmaped area for MARIA handler
+
+ SYNOPSIS
+ _ma_dynmap_file()
+ info MARIA handler
+
+ RETURN
+ 0 ok
+ 1 error.
+*/
+
+my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size)
+{
+ DBUG_ENTER("_ma_dynmap_file");
+ if (size > (my_off_t) (~((size_t) 0)) - MEMMAP_EXTRA_MARGIN)
+ {
+ DBUG_PRINT("warning", ("File is too large for mmap"));
+ DBUG_RETURN(1);
+ }
+ /*
+ Ingo wonders if it is good to use MAP_NORESERVE. From the Linux man page:
+ MAP_NORESERVE
+ Do not reserve swap space for this mapping. When swap space is
+ reserved, one has the guarantee that it is possible to modify the
+ mapping. When swap space is not reserved one might get SIGSEGV
+ upon a write if no physical memory is available.
+ */
+ info->s->file_map= (uchar*)
+ my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN),
+ info->s->mode==O_RDONLY ? PROT_READ :
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_NORESERVE,
+ info->dfile.file, 0L);
+ if (info->s->file_map == (uchar*) MAP_FAILED)
+ {
+ info->s->file_map= NULL;
+ DBUG_RETURN(1);
+ }
+#if defined(HAVE_MADVISE)
+ madvise((char*) info->s->file_map, size, MADV_RANDOM);
+#endif
+ info->s->mmaped_length= size;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Resize mmaped area for MARIA handler
+
+ SYNOPSIS
+ _ma_remap_file()
+ info MARIA handler
+
+ RETURN
+*/
+
+void _ma_remap_file(MARIA_HA *info, my_off_t size)
+{
+ if (info->s->file_map)
+ {
+ my_munmap((char*) info->s->file_map,
+ (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN);
+ _ma_dynmap_file(info, size);
+ }
+}
+#endif
+
+
+/*
+ Read bytes from MySAM handler, using mmap or pread
+
+ SYNOPSIS
+ _ma_mmap_pread()
+ info MARIA handler
+ Buffer Input buffer
+ Count Count of bytes for read
+ offset Start position
+ MyFlags
+
+ RETURN
+ 0 ok
+*/
+
+size_t _ma_mmap_pread(MARIA_HA *info, uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ DBUG_PRINT("info", ("maria_read with mmap %d\n", info->dfile.file));
+ if (info->s->lock_key_trees)
+ mysql_rwlock_rdlock(&info->s->mmap_lock);
+
+ /*
+ The following test may fail in the following cases:
+ - We failed to remap a memory area (fragmented memory?)
+ - This thread has done some writes, but not yet extended the
+ memory mapped area.
+ */
+
+ if (info->s->mmaped_length >= offset + Count)
+ {
+ memcpy(Buffer, info->s->file_map + offset, Count);
+ if (info->s->lock_key_trees)
+ mysql_rwlock_unlock(&info->s->mmap_lock);
+ return 0;
+ }
+ else
+ {
+ if (info->s->lock_key_trees)
+ mysql_rwlock_unlock(&info->s->mmap_lock);
+ return mysql_file_pread(info->dfile.file, Buffer, Count, offset, MyFlags);
+ }
+}
+
+
+ /* wrapper for my_pread in case if mmap isn't used */
+
+size_t _ma_nommap_pread(MARIA_HA *info, uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ return mysql_file_pread(info->dfile.file, Buffer, Count, offset, MyFlags);
+}
+
+
+/*
+ Write bytes to MySAM handler, using mmap or pwrite
+
+ SYNOPSIS
+ _ma_mmap_pwrite()
+ info MARIA handler
+ Buffer Output buffer
+ Count Count of bytes for write
+ offset Start position
+ MyFlags
+
+ RETURN
+ 0 ok
+ !=0 error. In this case return error from pwrite
+*/
+
+size_t _ma_mmap_pwrite(MARIA_HA *info, const uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ DBUG_PRINT("info", ("maria_write with mmap %d\n", info->dfile.file));
+ if (info->s->lock_key_trees)
+ mysql_rwlock_rdlock(&info->s->mmap_lock);
+
+ /*
+ The following test may fail in the following cases:
+ - We failed to remap a memory area (fragmented memory?)
+ - This thread has done some writes, but not yet extended the
+ memory mapped area.
+ */
+
+ if (info->s->mmaped_length >= offset + Count)
+ {
+ memcpy(info->s->file_map + offset, Buffer, Count);
+ if (info->s->lock_key_trees)
+ mysql_rwlock_unlock(&info->s->mmap_lock);
+ return 0;
+ }
+ else
+ {
+ info->s->nonmmaped_inserts++;
+ if (info->s->lock_key_trees)
+ mysql_rwlock_unlock(&info->s->mmap_lock);
+ return my_pwrite(info->dfile.file, Buffer, Count, offset, MyFlags);
+ }
+
+}
+
+
+ /* wrapper for my_pwrite in case if mmap isn't used */
+
+size_t _ma_nommap_pwrite(MARIA_HA *info, const uchar *Buffer,
+ size_t Count, my_off_t offset, myf MyFlags)
+{
+ return my_pwrite(info->dfile.file, Buffer, Count, offset, MyFlags);
+}
+
+
+my_bool _ma_write_dynamic_record(MARIA_HA *info, const uchar *record)
+{
+ ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ record);
+ if (!reclength)
+ return 1;
+ return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ reclength));
+}
+
+my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec __attribute__ ((unused)),
+ const uchar *record)
+{
+ uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ record);
+ if (!length)
+ return 1;
+ return (update_dynamic_record(info, pos,
+ info->rec_buff + MARIA_REC_BUFF_OFFSET,
+ length));
+}
+
+
+my_bool _ma_write_blob_record(MARIA_HA *info, const uchar *record)
+{
+ uchar *rec_buff;
+ int error;
+ ulong reclength,reclength2,extra;
+ my_bool buff_alloced;
+
+ extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER+1);
+ reclength= (info->s->base.pack_reclength +
+ _ma_calc_total_blob_length(info,record)+ extra);
+
+ alloc_on_stack(*info->stack_end_ptr, rec_buff, buff_alloced, reclength);
+ if (!rec_buff)
+ {
+ my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */
+ return(1);
+ }
+
+ reclength2= _ma_rec_pack(info,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ record);
+ if (!reclength2)
+ {
+ error= 1;
+ goto err;
+ }
+
+ DBUG_PRINT("info",("reclength: %lu reclength2: %lu",
+ reclength, reclength2));
+ DBUG_ASSERT(reclength2 <= reclength);
+ error= write_dynamic_record(info,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ reclength2);
+err:
+ stack_alloc_free(rec_buff, buff_alloced);
+ return(error != 0);
+}
+
+
+my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos,
+ const uchar *oldrec __attribute__ ((unused)),
+ const uchar *record)
+{
+ uchar *rec_buff;
+ int error;
+ ulong reclength,reclength2,extra;
+ my_bool buff_alloced;
+
+ extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
+ MARIA_DYN_DELETE_BLOCK_HEADER);
+ reclength= (info->s->base.pack_reclength+
+ _ma_calc_total_blob_length(info,record)+ extra);
+#ifdef NOT_USED /* We now support big rows */
+ if (reclength > MARIA_DYN_MAX_ROW_LENGTH)
+ {
+ my_errno=HA_ERR_TO_BIG_ROW;
+ return 1;
+ }
+#endif
+
+ alloc_on_stack(*info->stack_end_ptr, rec_buff, buff_alloced, reclength);
+ if (!rec_buff)
+ {
+ my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */
+ return(1);
+ }
+
+ reclength2= _ma_rec_pack(info, rec_buff+
+ ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ record);
+ if (!reclength2)
+ {
+ error= 1;
+ goto err;
+ }
+ DBUG_ASSERT(reclength2 <= reclength);
+ error=update_dynamic_record(info,pos,
+ rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER),
+ reclength2);
+err:
+ stack_alloc_free(rec_buff, buff_alloced);
+ return(error != 0);
+}
+
+
+my_bool _ma_delete_dynamic_record(MARIA_HA *info,
+ const uchar *record __attribute__ ((unused)))
+{
+ return delete_dynamic_record(info, info->cur_row.lastpos, 0);
+}
+
+
+/**
+ Write record to data-file.
+
+ @todo it's cheating: it casts "const uchar*" to uchar*.
+*/
+
+static my_bool write_dynamic_record(MARIA_HA *info, const uchar *record,
+ ulong reclength)
+{
+ int flag;
+ ulong length;
+ my_off_t filepos;
+ DBUG_ENTER("write_dynamic_record");
+
+ flag=0;
+
+ /*
+ Check if we have enough room for the new record.
+ First we do simplified check to make usual case faster.
+ Then we do more precise check for the space left.
+ Though it still is not absolutely precise, as
+ we always use MARIA_MAX_DYN_BLOCK_HEADER while it can be
+ less in the most of the cases.
+ */
+
+ if (unlikely(info->s->base.max_data_file_length -
+ info->state->data_file_length <
+ reclength + MARIA_MAX_DYN_BLOCK_HEADER))
+ {
+ if (info->s->base.max_data_file_length - info->state->data_file_length +
+ info->state->empty - info->state->del * MARIA_MAX_DYN_BLOCK_HEADER <
+ reclength + MARIA_MAX_DYN_BLOCK_HEADER)
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ DBUG_RETURN(1);
+ }
+ }
+
+ do
+ {
+ if (_ma_find_writepos(info,reclength,&filepos,&length))
+ goto err;
+ if (_ma_write_part_record(info,filepos,length,
+ (info->append_insert_at_end ?
+ HA_OFFSET_ERROR : info->s->state.dellink),
+ (uchar**) &record,&reclength,&flag))
+ goto err;
+ } while (reclength);
+
+ DBUG_RETURN(0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+ /* Get a block for data ; The given data-area must be used !! */
+
+static int _ma_find_writepos(MARIA_HA *info,
+ ulong reclength, /* record length */
+ my_off_t *filepos, /* Return file pos */
+ ulong *length) /* length of block at filepos */
+{
+ MARIA_BLOCK_INFO block_info;
+ ulong tmp;
+ DBUG_ENTER("_ma_find_writepos");
+
+ if (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end)
+ {
+ /* Deleted blocks exists; Get last used block */
+ *filepos=info->s->state.dellink;
+ block_info.second_read=0;
+ info->rec_cache.seek_not_done=1;
+ if (!(_ma_get_block_info(info, &block_info, info->dfile.file,
+ info->s->state.dellink) &
+ BLOCK_DELETED))
+ {
+ DBUG_PRINT("error",("Delete link crashed"));
+ _ma_set_fatal_error_with_share(info->s, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(-1);
+ }
+ info->s->state.dellink=block_info.next_filepos;
+ info->state->del--;
+ info->state->empty-= block_info.block_len;
+ *length= block_info.block_len;
+ }
+ else
+ {
+ /* No deleted blocks; Allocate a new block */
+ *filepos=info->state->data_file_length;
+ if ((tmp= reclength + 3 + MY_TEST(reclength >= (65520 - 3))) <
+ info->s->base.min_block_length)
+ tmp= info->s->base.min_block_length;
+ else
+ tmp= ((tmp+MARIA_DYN_ALIGN_SIZE-1) &
+ (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1)));
+ if (info->state->data_file_length >
+ (info->s->base.max_data_file_length - tmp))
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ DBUG_RETURN(-1);
+ }
+ if (tmp > MARIA_MAX_BLOCK_LENGTH)
+ tmp=MARIA_MAX_BLOCK_LENGTH;
+ *length= tmp;
+ info->state->data_file_length+= tmp;
+ info->s->state.split++;
+ info->update|=HA_STATE_WRITE_AT_END;
+ }
+ DBUG_RETURN(0);
+} /* _ma_find_writepos */
+
+
+
+/*
+ Unlink a deleted block from the deleted list.
+ This block will be combined with the preceding or next block to form
+ a big block.
+*/
+
+static my_bool unlink_deleted_block(MARIA_HA *info,
+ MARIA_BLOCK_INFO *block_info)
+{
+ DBUG_ENTER("unlink_deleted_block");
+ if (block_info->filepos == info->s->state.dellink)
+ {
+ /* First deleted block; We can just use this ! */
+ info->s->state.dellink=block_info->next_filepos;
+ }
+ else
+ {
+ MARIA_BLOCK_INFO tmp;
+ tmp.second_read=0;
+ /* Unlink block from the previous block */
+ if (!(_ma_get_block_info(info, &tmp, info->dfile.file,
+ block_info->prev_filepos)
+ & BLOCK_DELETED))
+ DBUG_RETURN(1); /* Something is wrong */
+ mi_sizestore(tmp.header+4,block_info->next_filepos);
+ if (info->s->file_write(info, tmp.header+4,8,
+ block_info->prev_filepos+4, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ /* Unlink block from next block */
+ if (block_info->next_filepos != HA_OFFSET_ERROR)
+ {
+ if (!(_ma_get_block_info(info, &tmp, info->dfile.file,
+ block_info->next_filepos)
+ & BLOCK_DELETED))
+ DBUG_RETURN(1); /* Something is wrong */
+ mi_sizestore(tmp.header+12,block_info->prev_filepos);
+ if (info->s->file_write(info, tmp.header+12,8,
+ block_info->next_filepos+12,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ }
+ }
+ /* We now have one less deleted block */
+ info->state->del--;
+ info->state->empty-= block_info->block_len;
+ info->s->state.split--;
+
+ /*
+ If this was a block that we where accessing through table scan
+ (maria_rrnd() or maria_scan(), then ensure that we skip over this block
+ when doing next maria_rrnd() or maria_scan().
+ */
+ if (info->cur_row.nextpos == block_info->filepos)
+ info->cur_row.nextpos+= block_info->block_len;
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Add a backward link to delete block
+
+ SYNOPSIS
+ update_backward_delete_link()
+ info MARIA handler
+ delete_block Position to delete block to update.
+ If this is 'HA_OFFSET_ERROR', nothing will be done
+ filepos Position to block that 'delete_block' should point to
+
+ RETURN
+ 0 ok
+ 1 error. In this case my_error is set.
+*/
+
+static my_bool update_backward_delete_link(MARIA_HA *info,
+ my_off_t delete_block,
+ MARIA_RECORD_POS filepos)
+{
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("update_backward_delete_link");
+
+ if (delete_block != HA_OFFSET_ERROR)
+ {
+ block_info.second_read=0;
+ if (_ma_get_block_info(info, &block_info, info->dfile.file, delete_block)
+ & BLOCK_DELETED)
+ {
+ uchar buff[8];
+ mi_sizestore(buff,filepos);
+ if (info->s->file_write(info,buff, 8, delete_block+12, MYF(MY_NABP)))
+ DBUG_RETURN(1); /* Error on write */
+ }
+ else
+ {
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(1); /* Wrong delete link */
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+/* Delete datarecord from database */
+/* info->rec_cache.seek_not_done is updated in cmp_record */
+
+static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uint second_read)
+{
+ uint length,b_type;
+ MARIA_BLOCK_INFO block_info,del_block;
+ int error;
+ my_bool remove_next_block;
+ DBUG_ENTER("delete_dynamic_record");
+
+ /* First add a link from the last block to the new one */
+ error= update_backward_delete_link(info, info->s->state.dellink, filepos);
+
+ block_info.second_read=second_read;
+ do
+ {
+ /* Remove block at 'filepos' */
+ if ((b_type= _ma_get_block_info(info, &block_info, info->dfile.file,
+ filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR) ||
+ (length=(uint) (block_info.filepos-filepos) +block_info.block_len) <
+ MARIA_MIN_BLOCK_LENGTH)
+ {
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ DBUG_RETURN(1);
+ }
+ /* Check if next block is a delete block */
+ del_block.second_read=0;
+ remove_next_block=0;
+ if (_ma_get_block_info(info, &del_block, info->dfile.file,
+ filepos + length) &
+ BLOCK_DELETED && del_block.block_len+length <
+ MARIA_DYN_MAX_BLOCK_LENGTH)
+ {
+ /* We can't remove this yet as this block may be the head block */
+ remove_next_block=1;
+ length+=del_block.block_len;
+ }
+
+ block_info.header[0]=0;
+ mi_int3store(block_info.header+1,length);
+ mi_sizestore(block_info.header+4,info->s->state.dellink);
+ if (b_type & BLOCK_LAST)
+ bfill(block_info.header+12,8,255);
+ else
+ mi_sizestore(block_info.header+12,block_info.next_filepos);
+ if (info->s->file_write(info, block_info.header, 20, filepos,
+ MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ info->s->state.dellink = filepos;
+ info->state->del++;
+ info->state->empty+=length;
+ filepos=block_info.next_filepos;
+
+ /* Now it's safe to unlink the deleted block directly after this one */
+ if (remove_next_block && unlink_deleted_block(info,&del_block))
+ error=1;
+ } while (!(b_type & BLOCK_LAST));
+
+ DBUG_RETURN(error);
+}
+
+
+ /* Write a block to datafile */
+
+int _ma_write_part_record(MARIA_HA *info,
+ my_off_t filepos, /* points at empty block */
+ ulong length, /* length of block */
+ my_off_t next_filepos,/* Next empty block */
+ uchar **record, /* pointer to record ptr */
+ ulong *reclength, /* length of *record */
+ int *flag) /* *flag == 0 if header */
+{
+ ulong head_length,res_length,extra_length,long_block,del_length;
+ uchar *pos,*record_end;
+ my_off_t next_delete_block;
+ uchar temp[MARIA_SPLIT_LENGTH+MARIA_DYN_DELETE_BLOCK_HEADER];
+ DBUG_ENTER("_ma_write_part_record");
+
+ next_delete_block=HA_OFFSET_ERROR;
+
+ res_length=extra_length=0;
+ if (length > *reclength + MARIA_SPLIT_LENGTH)
+ { /* Splitt big block */
+ res_length=MY_ALIGN(length- *reclength - MARIA_EXTEND_BLOCK_LENGTH,
+ MARIA_DYN_ALIGN_SIZE);
+ length-= res_length; /* Use this for first part */
+ }
+ long_block= (length < 65520L && *reclength < 65520L) ? 0 : 1;
+ if (length == *reclength+ 3 + long_block)
+ {
+ /* Block is exactly of the right length */
+ temp[0]=(uchar) (1+ *flag)+(uchar) long_block; /* Flag is 0 or 6 */
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ head_length=4;
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ head_length=3;
+ }
+ }
+ else if (length-long_block < *reclength+4)
+ { /* To short block */
+ if (next_filepos == HA_OFFSET_ERROR)
+ next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR &&
+ !info->append_insert_at_end ?
+ info->s->state.dellink : info->state->data_file_length);
+ if (*flag == 0) /* First block */
+ {
+ if (*reclength > MARIA_MAX_BLOCK_LENGTH)
+ {
+ head_length= 16;
+ temp[0]=13;
+ mi_int4store(temp+1,*reclength);
+ mi_int3store(temp+5,length-head_length);
+ mi_sizestore(temp+8,next_filepos);
+ }
+ else
+ {
+ head_length=5+8+long_block*2;
+ temp[0]=5+(uchar) long_block;
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ mi_int3store(temp+4,length-head_length);
+ mi_sizestore(temp+7,next_filepos);
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ mi_int2store(temp+3,length-head_length);
+ mi_sizestore(temp+5,next_filepos);
+ }
+ }
+ }
+ else
+ {
+ head_length=3+8+long_block;
+ temp[0]=11+(uchar) long_block;
+ if (long_block)
+ {
+ mi_int3store(temp+1,length-head_length);
+ mi_sizestore(temp+4,next_filepos);
+ }
+ else
+ {
+ mi_int2store(temp+1,length-head_length);
+ mi_sizestore(temp+3,next_filepos);
+ }
+ }
+ }
+ else
+ { /* Block with empty info last */
+ head_length=4+long_block;
+ extra_length= length- *reclength-head_length;
+ temp[0]= (uchar) (3+ *flag)+(uchar) long_block; /* 3,4 or 9,10 */
+ if (long_block)
+ {
+ mi_int3store(temp+1,*reclength);
+ temp[4]= (uchar) (extra_length);
+ }
+ else
+ {
+ mi_int2store(temp+1,*reclength);
+ temp[3]= (uchar) (extra_length);
+ }
+ length= *reclength+head_length; /* Write only what is needed */
+ }
+ DBUG_DUMP("header", temp, head_length);
+
+ /* Make a long block for one write */
+ record_end= *record+length-head_length;
+ del_length=(res_length ? MARIA_DYN_DELETE_BLOCK_HEADER : 0);
+ bmove((*record-head_length), temp, head_length);
+ memcpy(temp,record_end,(size_t) (extra_length+del_length));
+ bzero(record_end, extra_length);
+
+ if (res_length)
+ {
+ /* Check first if we can join this block with the next one */
+ MARIA_BLOCK_INFO del_block;
+ my_off_t next_block=filepos+length+extra_length+res_length;
+
+ del_block.second_read=0;
+ if (next_block < info->state->data_file_length &&
+ info->s->state.dellink != HA_OFFSET_ERROR)
+ {
+ if ((_ma_get_block_info(info, &del_block, info->dfile.file, next_block)
+ & BLOCK_DELETED) &&
+ res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH)
+ {
+ if (unlink_deleted_block(info,&del_block))
+ goto err;
+ res_length+=del_block.block_len;
+ }
+ }
+
+ /* Create a delete link of the last part of the block */
+ pos=record_end+extra_length;
+ pos[0]= '\0';
+ mi_int3store(pos+1,res_length);
+ mi_sizestore(pos+4,info->s->state.dellink);
+ bfill(pos+12,8,255); /* End link */
+ next_delete_block=info->s->state.dellink;
+ info->s->state.dellink= filepos+length+extra_length;
+ info->state->del++;
+ info->state->empty+=res_length;
+ info->s->state.split++;
+ }
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->update & HA_STATE_WRITE_AT_END)
+ {
+ if (info->update & HA_STATE_EXTEND_BLOCK)
+ {
+ info->update&= ~HA_STATE_EXTEND_BLOCK;
+ if (my_block_write(&info->rec_cache, *record-head_length,
+ length+extra_length+del_length,filepos))
+ goto err;
+ }
+ else if (my_b_write(&info->rec_cache, *record-head_length,
+ length+extra_length+del_length))
+ goto err;
+ }
+ else
+ {
+ info->rec_cache.seek_not_done=1;
+ if (info->s->file_write(info, *record-head_length,
+ length+extra_length+
+ del_length,filepos,info->s->write_flag))
+ goto err;
+ }
+ memcpy(record_end,temp,(size_t) (extra_length+del_length));
+ *record=record_end;
+ *reclength-=(length-head_length);
+ *flag=6;
+
+ if (del_length)
+ {
+ /* link the next delete block to this */
+ if (update_backward_delete_link(info, next_delete_block,
+ info->s->state.dellink))
+ goto err;
+ }
+
+ DBUG_RETURN(0);
+err:
+ DBUG_PRINT("exit",("errno: %d",my_errno));
+ DBUG_RETURN(1);
+} /* _ma_write_part_record */
+
+
+ /* update record from datafile */
+
+static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
+ uchar *record, ulong reclength)
+{
+ int flag;
+ uint error;
+ ulong length;
+ MARIA_BLOCK_INFO block_info;
+ DBUG_ENTER("update_dynamic_record");
+
+ flag=block_info.second_read=0;
+ /*
+ Check if we have enough room for the record.
+ First we do simplified check to make usual case faster.
+ Then we do more precise check for the space left.
+ Though it still is not absolutely precise, as
+ we always use MARIA_MAX_DYN_BLOCK_HEADER while it can be
+ less in the most of the cases.
+ */
+
+ /*
+ compare with just the reclength as we're going
+ to get some space from the old replaced record
+ */
+ if (unlikely(info->s->base.max_data_file_length -
+ info->state->data_file_length < reclength))
+ {
+ /* If new record isn't longer, we can go on safely */
+ if (info->cur_row.total_length < reclength)
+ {
+ if (info->s->base.max_data_file_length - info->state->data_file_length +
+ info->state->empty - info->state->del * MARIA_MAX_DYN_BLOCK_HEADER <
+ reclength - info->cur_row.total_length + MARIA_MAX_DYN_BLOCK_HEADER)
+ {
+ my_errno=HA_ERR_RECORD_FILE_FULL;
+ goto err;
+ }
+ }
+ }
+ /* Remember length for updated row if it's updated again */
+ info->cur_row.total_length= reclength;
+
+ while (reclength > 0)
+ {
+ if (filepos != info->s->state.dellink)
+ {
+ block_info.next_filepos= HA_OFFSET_ERROR;
+ if ((error= _ma_get_block_info(info, &block_info, info->dfile.file,
+ filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ DBUG_PRINT("error",("Got wrong block info"));
+ if (!(error & BLOCK_FATAL_ERROR))
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ goto err;
+ }
+ length=(ulong) (block_info.filepos-filepos) + block_info.block_len;
+ if (length < reclength)
+ {
+ uint tmp=MY_ALIGN(reclength - length + 3 +
+ MY_TEST(reclength >= 65520L), MARIA_DYN_ALIGN_SIZE);
+ /* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */
+ tmp= MY_MIN(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
+ /* Check if we can extend this block */
+ if (block_info.filepos + block_info.block_len ==
+ info->state->data_file_length &&
+ info->state->data_file_length <
+ info->s->base.max_data_file_length-tmp)
+ {
+ /* extend file */
+ DBUG_PRINT("info",("Extending file with %d bytes",tmp));
+ if (info->cur_row.nextpos == info->state->data_file_length)
+ info->cur_row.nextpos+= tmp;
+ info->state->data_file_length+= tmp;
+ info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK;
+ length+=tmp;
+ }
+ else if (length < MARIA_MAX_BLOCK_LENGTH - MARIA_MIN_BLOCK_LENGTH)
+ {
+ /*
+ Check if next block is a deleted block
+ Above we have MARIA_MIN_BLOCK_LENGTH to avoid the problem where
+ the next block is so small it can't be splited which could
+ casue problems
+ */
+
+ MARIA_BLOCK_INFO del_block;
+ del_block.second_read=0;
+ if (_ma_get_block_info(info, &del_block, info->dfile.file,
+ block_info.filepos + block_info.block_len) &
+ BLOCK_DELETED)
+ {
+ /* Use; Unlink it and extend the current block */
+ DBUG_PRINT("info",("Extending current block"));
+ if (unlink_deleted_block(info,&del_block))
+ goto err;
+ if ((length+=del_block.block_len) > MARIA_MAX_BLOCK_LENGTH)
+ {
+ /*
+ New block was too big, link overflow part back to
+ delete list
+ */
+ my_off_t next_pos;
+ ulong rest_length= length-MARIA_MAX_BLOCK_LENGTH;
+ set_if_bigger(rest_length, MARIA_MIN_BLOCK_LENGTH);
+ next_pos= del_block.filepos+ del_block.block_len - rest_length;
+
+ if (update_backward_delete_link(info, info->s->state.dellink,
+ next_pos))
+ DBUG_RETURN(1);
+
+ /* create delete link for data that didn't fit into the page */
+ del_block.header[0]=0;
+ mi_int3store(del_block.header+1, rest_length);
+ mi_sizestore(del_block.header+4,info->s->state.dellink);
+ bfill(del_block.header+12,8,255);
+ if (info->s->file_write(info, del_block.header, 20,
+ next_pos, MYF(MY_NABP)))
+ DBUG_RETURN(1);
+ info->s->state.dellink= next_pos;
+ info->s->state.split++;
+ info->state->del++;
+ info->state->empty+= rest_length;
+ length-= rest_length;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ if (_ma_find_writepos(info,reclength,&filepos,&length))
+ goto err;
+ }
+ if (_ma_write_part_record(info,filepos,length,block_info.next_filepos,
+ &record,&reclength,&flag))
+ goto err;
+ if ((filepos=block_info.next_filepos) == HA_OFFSET_ERROR)
+ {
+ /* Start writing data on deleted blocks */
+ filepos=info->s->state.dellink;
+ }
+ }
+
+ if (block_info.next_filepos != HA_OFFSET_ERROR)
+ if (delete_dynamic_record(info,block_info.next_filepos,1))
+ goto err;
+
+ DBUG_RETURN(0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+/**
+ Pack a record.
+
+ @return new reclength
+ @return 0 in case of wrong data in record
+*/
+
+uint _ma_rec_pack(MARIA_HA *info, register uchar *to,
+ register const uchar *from)
+{
+ uint length,new_length,flag,bit,i;
+ const uchar *pos,*end;
+ uchar *startpos,*packpos;
+ enum en_fieldtype type;
+ reg3 MARIA_COLUMNDEF *column;
+ MARIA_BLOB *blob;
+ DBUG_ENTER("_ma_rec_pack");
+
+ flag= 0;
+ bit= 1;
+ startpos= packpos=to;
+ to+= info->s->base.pack_bytes;
+ blob= info->blobs;
+ column= info->s->columndef;
+ if (info->s->base.null_bytes)
+ {
+ memcpy(to, from, info->s->base.null_bytes);
+ from+= info->s->base.null_bytes;
+ to+= info->s->base.null_bytes;
+ }
+
+ for (i=info->s->base.fields ; i-- > 0; from+= length, column++)
+ {
+ length=(uint) column->length;
+ if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL)
+ {
+ if (type == FIELD_BLOB)
+ {
+ if (!blob->length)
+ flag|=bit;
+ else
+ {
+ char *temp_pos;
+ size_t tmp_length=length-portable_sizeof_char_ptr;
+ memcpy(to,from,tmp_length);
+ memcpy(&temp_pos,from+tmp_length,sizeof(char*));
+ memcpy(to+tmp_length,temp_pos,(size_t) blob->length);
+ to+=tmp_length+blob->length;
+ }
+ blob++;
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ {
+ if (memcmp(from, maria_zero_string, length) == 0)
+ flag|=bit;
+ else
+ {
+ memcpy(to, from, (size_t) length);
+ to+=length;
+ }
+ }
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ pos= from; end= from + length;
+ if (type == FIELD_SKIP_ENDSPACE)
+ { /* Pack trailing spaces */
+ while (end > from && *(end-1) == ' ')
+ end--;
+ }
+ else
+ { /* Pack pref-spaces */
+ while (pos < end && *pos == ' ')
+ pos++;
+ }
+ new_length=(uint) (end-pos);
+ if (new_length + 1 + MY_TEST(column->length > 255 && new_length > 127)
+ < length)
+ {
+ if (column->length > 255 && new_length > 127)
+ {
+ to[0]= (uchar) ((new_length & 127) + 128);
+ to[1]= (uchar) (new_length >> 7);
+ to+=2;
+ }
+ else
+ *to++= (uchar) new_length;
+ memcpy(to, pos, (size_t) new_length); to+=new_length;
+ flag|=bit;
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ }
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *from;
+ *to++= *from;
+ }
+ else
+ {
+ tmp_length= uint2korr(from);
+ store_key_length_inc(to,tmp_length);
+ }
+ if (tmp_length > column->length)
+ {
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ DBUG_RETURN(0);
+ }
+ memcpy(to, from+pack_length,tmp_length);
+ to+= tmp_length;
+ continue;
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ continue; /* Normal field */
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ *packpos++ = (uchar) flag;
+ bit=1; flag=0;
+ }
+ }
+ else
+ {
+ memcpy(to,from,(size_t) length); to+=length;
+ }
+ }
+ if (bit != 1)
+ *packpos= (uchar) flag;
+ if (info->s->calc_checksum)
+ *to++= (uchar) info->cur_row.checksum;
+ DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos)));
+ DBUG_RETURN((uint) (to-startpos));
+} /* _ma_rec_pack */
+
+
+
+/*
+ Check if a record was correctly packed. Used only by maria_chk
+ Returns 0 if record is ok.
+*/
+
+my_bool _ma_rec_check(MARIA_HA *info,const uchar *record, uchar *rec_buff,
+ ulong packed_length, my_bool with_checksum,
+ ha_checksum checksum)
+{
+ uint length,new_length,flag,bit,i;
+ const uchar *pos,*end;
+ uchar *packpos,*to;
+ enum en_fieldtype type;
+ reg3 MARIA_COLUMNDEF *column;
+ DBUG_ENTER("_ma_rec_check");
+
+ packpos=rec_buff; to= rec_buff+info->s->base.pack_bytes;
+ column= info->s->columndef;
+ flag= *packpos; bit=1;
+ record+= info->s->base.null_bytes;
+ to+= info->s->base.null_bytes;
+
+ for (i=info->s->base.fields ; i-- > 0; record+= length, column++)
+ {
+ length=(uint) column->length;
+ if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL)
+ {
+ if (type == FIELD_BLOB)
+ {
+ uint blob_length=
+ _ma_calc_blob_length(length-portable_sizeof_char_ptr,record);
+ if (!blob_length && !(flag & bit))
+ goto err;
+ if (blob_length)
+ to+=length - portable_sizeof_char_ptr+ blob_length;
+ }
+ else if (type == FIELD_SKIP_ZERO)
+ {
+ if (memcmp(record, maria_zero_string, length) == 0)
+ {
+ if (!(flag & bit))
+ goto err;
+ }
+ else
+ to+=length;
+ }
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ pos= record; end= record + length;
+ if (type == FIELD_SKIP_ENDSPACE)
+ { /* Pack trailing spaces */
+ while (end > record && *(end-1) == ' ')
+ end--;
+ }
+ else
+ { /* Pack pre-spaces */
+ while (pos < end && *pos == ' ')
+ pos++;
+ }
+ new_length=(uint) (end-pos);
+ if (new_length + 1 + MY_TEST(column->length > 255 && new_length > 127)
+ < length)
+ {
+ if (!(flag & bit))
+ goto err;
+ if (column->length > 255 && new_length > 127)
+ {
+ /* purecov: begin inspected */
+ if (to[0] != (uchar) ((new_length & 127) + 128) ||
+ to[1] != (uchar) (new_length >> 7))
+ goto err;
+ to+=2;
+ /* purecov: end */
+ }
+ else if (*to++ != (uchar) new_length)
+ goto err;
+ to+=new_length;
+ }
+ else
+ to+=length;
+ }
+ else if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column->length -1);
+ uint tmp_length;
+ if (pack_length == 1)
+ {
+ tmp_length= (uint) *record;
+ to+= 1+ tmp_length;
+ continue;
+ }
+ else
+ {
+ tmp_length= uint2korr(record);
+ to+= get_pack_length(tmp_length)+tmp_length;
+ }
+ continue;
+ }
+ else
+ {
+ to+=length;
+ continue; /* Normal field */
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ flag= *++packpos;
+ bit=1;
+ }
+ }
+ else
+ to+= length;
+ }
+ if (packed_length != (uint) (to - rec_buff) +
+ MY_TEST(info->s->calc_checksum) || (bit != 1 && (flag & ~(bit - 1))))
+ goto err;
+ if (with_checksum && ((uchar) checksum != (uchar) *to))
+ {
+ DBUG_PRINT("error",("wrong checksum for row"));
+ goto err;
+ }
+ DBUG_RETURN(0);
+
+err:
+ DBUG_RETURN(1);
+}
+
+
+/*
+ @brief Unpacks a record
+
+ @return Recordlength
+ @retval >0 ok
+ @retval MY_FILE_ERROR (== -1) Error.
+ my_errno is set to HA_ERR_WRONG_IN_RECORD
+*/
+
+size_t _ma_rec_unpack(register MARIA_HA *info, register uchar *to, uchar *from,
+ size_t found_length)
+{
+ uint flag,bit,length,min_pack_length, column_length;
+ enum en_fieldtype type;
+ uchar *from_end,*to_end,*packpos;
+ reg3 MARIA_COLUMNDEF *column, *end_column;
+ DBUG_ENTER("_ma_rec_unpack");
+
+ to_end=to + info->s->base.reclength;
+ from_end=from+found_length;
+ flag= (uchar) *from; bit=1; packpos=from;
+ if (found_length < info->s->base.min_pack_length)
+ goto err;
+ from+= info->s->base.pack_bytes;
+ min_pack_length= info->s->base.min_pack_length - info->s->base.pack_bytes;
+
+ if ((length= info->s->base.null_bytes))
+ {
+ memcpy(to, from, length);
+ from+= length;
+ to+= length;
+ min_pack_length-= length;
+ }
+
+ for (column= info->s->columndef, end_column= column + info->s->base.fields;
+ column < end_column ; to+= column_length, column++)
+ {
+ column_length= column->length;
+ if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL &&
+ (type != FIELD_CHECK))
+ {
+ if (type == FIELD_VARCHAR)
+ {
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column_length-1);
+ if (pack_length == 1)
+ {
+ length= (uint) *(uchar*) from;
+ if (length > column_length-1)
+ goto err;
+ *to= *from++;
+ }
+ else
+ {
+ get_key_length(length, from);
+ if (length > column_length-2)
+ goto err;
+ int2store(to,length);
+ }
+ if (from+length > from_end)
+ goto err;
+ memcpy(to+pack_length, from, length);
+ MEM_UNDEFINED(to+pack_length + length,
+ column_length - length - pack_length);
+ from+= length;
+ min_pack_length--;
+ continue;
+ }
+ if (flag & bit)
+ {
+ if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO)
+ bzero(to, column_length);
+ else if (type == FIELD_SKIP_ENDSPACE ||
+ type == FIELD_SKIP_PRESPACE)
+ {
+ if (column->length > 255 && *from & 128)
+ {
+ if (from + 1 >= from_end)
+ goto err;
+ length= (*from & 127)+ ((uint) (uchar) *(from+1) << 7); from+=2;
+ }
+ else
+ {
+ if (from == from_end)
+ goto err;
+ length= (uchar) *from++;
+ }
+ min_pack_length--;
+ if (length >= column_length ||
+ min_pack_length + length > (uint) (from_end - from))
+ goto err;
+ if (type == FIELD_SKIP_ENDSPACE)
+ {
+ memcpy(to, from, (size_t) length);
+ bfill(to+length, column_length-length, ' ');
+ }
+ else
+ {
+ bfill(to, column_length-length, ' ');
+ memcpy(to+column_length-length, from, (size_t) length);
+ }
+ from+=length;
+ }
+ }
+ else if (type == FIELD_BLOB)
+ {
+ uint size_length=column_length- portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(size_length,from);
+ ulong from_left= (ulong) (from_end - from);
+ if (from_left < size_length ||
+ from_left - size_length < blob_length ||
+ from_left - size_length - blob_length < min_pack_length)
+ goto err;
+ memcpy(to, from, (size_t) size_length);
+ from+=size_length;
+ memcpy(to+size_length,(uchar*) &from,sizeof(char*));
+ from+=blob_length;
+ }
+ else
+ {
+ if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE)
+ min_pack_length--;
+ if (min_pack_length + column_length > (uint) (from_end - from))
+ goto err;
+ memcpy(to, from, (size_t) column_length); from+=column_length;
+ }
+ if ((bit= bit << 1) >= 256)
+ {
+ flag= (uchar) *++packpos; bit=1;
+ }
+ }
+ else
+ {
+ if (min_pack_length > (uint) (from_end - from))
+ goto err;
+ min_pack_length-=column_length;
+ memcpy(to, from, (size_t) column_length);
+ from+=column_length;
+ }
+ }
+ if (info->s->calc_checksum)
+ info->cur_row.checksum= (uint) (uchar) *from++;
+ if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1))))
+ DBUG_RETURN(found_length);
+
+err:
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ DBUG_PRINT("error",("to_end: %p -> %p from_end: %p -> %p",
+ to, to_end, from, from_end));
+ DBUG_DUMP("from", info->rec_buff, info->s->base.min_pack_length);
+ DBUG_RETURN(MY_FILE_ERROR);
+} /* _ma_rec_unpack */
+
+
+ /* Calc length of blob. Update info in blobs->length */
+
+ulong _ma_calc_total_blob_length(MARIA_HA *info, const uchar *record)
+{
+ ulong length;
+ MARIA_BLOB *blob,*end;
+
+ for (length=0, blob= info->blobs, end=blob+info->s->base.blobs ;
+ blob != end;
+ blob++)
+ {
+ blob->length= _ma_calc_blob_length(blob->pack_length,
+ record + blob->offset);
+ length+=blob->length;
+ }
+ return length;
+}
+
+
+ulong _ma_calc_blob_length(uint length, const uchar *pos)
+{
+ switch (length) {
+ case 1:
+ return (uint) (uchar) *pos;
+ case 2:
+ return (uint) uint2korr(pos);
+ case 3:
+ return uint3korr(pos);
+ case 4:
+ return uint4korr(pos);
+ default:
+ break;
+ }
+ return 0; /* Impossible */
+}
+
+
+void _ma_store_blob_length(uchar *pos,uint pack_length,uint length)
+{
+ switch (pack_length) {
+ case 1:
+ *pos= (uchar) length;
+ break;
+ case 2:
+ int2store(pos,length);
+ break;
+ case 3:
+ int3store(pos,length);
+ break;
+ case 4:
+ int4store(pos,length);
+ default:
+ break;
+ }
+ return;
+}
+
+
+/*
+ Read record from datafile.
+
+ SYNOPSIS
+ _ma_read_dynamic_record()
+ info MARIA_HA pointer to table.
+ filepos From where to read the record.
+ buf Destination for record.
+
+ NOTE
+ If a write buffer is active, it needs to be flushed if its contents
+ intersects with the record to read. We always check if the position
+ of the first uchar of the write buffer is lower than the position
+ past the last uchar to read. In theory this is also true if the write
+ buffer is completely below the read segment. That is, if there is no
+ intersection. But this case is unusual. We flush anyway. Only if the
+ first uchar in the write buffer is above the last uchar to read, we do
+ not flush.
+
+ A dynamic record may need several reads. So this check must be done
+ before every read. Reading a dynamic record starts with reading the
+ block header. If the record does not fit into the free space of the
+ header, the block may be longer than the header. In this case a
+ second read is necessary. These one or two reads repeat for every
+ part of the record.
+
+ RETURN
+ 0 OK
+ # Error number
+*/
+
+int _ma_read_dynamic_record(MARIA_HA *info, uchar *buf,
+ MARIA_RECORD_POS filepos)
+{
+ int block_of_record;
+ uint b_type;
+ MARIA_BLOCK_INFO block_info;
+ File file;
+ uchar *UNINIT_VAR(to);
+ uint UNINIT_VAR(left_length);
+ MARIA_SHARE *share= info->s;
+ myf flag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
+ DBUG_ENTER("_ma_read_dynamic_record");
+
+ if (filepos == HA_OFFSET_ERROR)
+ goto err;
+
+ file= info->dfile.file;
+ block_of_record= 0; /* First block of record is numbered as zero. */
+ block_info.second_read= 0;
+ do
+ {
+ /* A corrupted table can have wrong pointers. (Bug# 19835) */
+ if (filepos == HA_OFFSET_ERROR)
+ goto panic;
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ (info->rec_cache.pos_in_file < filepos +
+ MARIA_BLOCK_INFO_HEADER_LENGTH) &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ info->rec_cache.seek_not_done=1;
+ if ((b_type= _ma_get_block_info(info, &block_info, file, filepos)) &
+ (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
+ my_errno=HA_ERR_RECORD_DELETED;
+ goto err;
+ }
+ if (block_of_record++ == 0) /* First block */
+ {
+ info->cur_row.total_length= block_info.rec_len;
+ if (block_info.rec_len > (uint) share->base.max_pack_length)
+ goto panic;
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ share->base.extra_rec_buff_size, flag))
+ goto err;
+ }
+ to= info->rec_buff;
+ left_length=block_info.rec_len;
+ }
+ if (left_length < block_info.data_len || ! block_info.data_len)
+ goto panic; /* Wrong linked record */
+ /* copy information that is already read */
+ {
+ uint offset= (uint) (block_info.filepos - filepos);
+ uint prefetch_len= (sizeof(block_info.header) - offset);
+ filepos+= sizeof(block_info.header);
+
+ if (prefetch_len > block_info.data_len)
+ prefetch_len= block_info.data_len;
+ if (prefetch_len)
+ {
+ memcpy(to, block_info.header + offset, prefetch_len);
+ block_info.data_len-= prefetch_len;
+ left_length-= prefetch_len;
+ to+= prefetch_len;
+ }
+ }
+ /* read rest of record from file */
+ if (block_info.data_len)
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + block_info.data_len &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ /*
+ What a pity that this method is not called 'file_pread' and that
+ there is no equivalent without seeking. We are at the right
+ position already. :(
+ */
+ if (share->file_read(info, to, block_info.data_len,
+ filepos, MYF(MY_NABP)))
+ goto panic;
+ left_length-=block_info.data_len;
+ to+=block_info.data_len;
+ }
+ filepos= block_info.next_filepos;
+ } while (left_length);
+
+ info->update|= HA_STATE_AKTIV; /* We have a aktive record */
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) !=
+ MY_FILE_ERROR ? 0 : my_errno);
+
+err:
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(my_errno);
+
+panic:
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ goto err;
+}
+
+ /* compare unique constraint between stored rows */
+
+my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
+ const uchar *record, MARIA_RECORD_POS pos)
+{
+ uchar *old_rec_buff,*old_record;
+ size_t old_rec_buff_size;
+ my_bool error, buff_alloced;
+ DBUG_ENTER("_ma_cmp_dynamic_unique");
+
+ alloc_on_stack(*info->stack_end_ptr, old_record, buff_alloced,
+ info->s->base.reclength);
+ if (!old_record)
+ DBUG_RETURN(1);
+
+ /* Don't let the compare destroy blobs that may be in use */
+ old_rec_buff= info->rec_buff;
+ old_rec_buff_size= info->rec_buff_size;
+
+ if (info->s->base.blobs)
+ {
+ info->rec_buff= 0;
+ info->rec_buff_size= 0;
+ }
+ error= _ma_read_dynamic_record(info, old_record, pos) != 0;
+ if (!error)
+ error=_ma_unique_comp(def, record, old_record, def->null_are_equal) != 0;
+ if (info->s->base.blobs)
+ {
+ my_free(info->rec_buff);
+ info->rec_buff= old_rec_buff;
+ info->rec_buff_size= old_rec_buff_size;
+ }
+ stack_alloc_free(old_record, buff_alloced);
+ DBUG_RETURN(error);
+}
+
+
+ /* Compare of record on disk with packed record in memory */
+
+my_bool _ma_cmp_dynamic_record(register MARIA_HA *info,
+ register const uchar *record)
+{
+ uint flag, reclength, b_type,cmp_length;
+ my_off_t filepos;
+ uchar *buffer;
+ MARIA_BLOCK_INFO block_info;
+ my_bool error= 1, buff_alloced= 0;
+ size_t UNINIT_VAR(buffer_length);
+ DBUG_ENTER("_ma_cmp_dynamic_record");
+
+ if (info->opt_flag & WRITE_CACHE_USED)
+ {
+ info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK);
+ if (flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(1);
+ }
+ info->rec_cache.seek_not_done=1;
+
+ /* If nobody have touched the database we don't have to test rec */
+
+ buffer=info->rec_buff;
+ if ((info->opt_flag & READ_CHECK_USED))
+ { /* If check isn't disabled */
+ if (info->s->base.blobs)
+ {
+ buffer_length= (info->s->base.pack_reclength +
+ _ma_calc_total_blob_length(info,record));
+
+ alloc_on_stack(*info->stack_end_ptr, buffer, buff_alloced, buffer_length);
+ if (!buffer)
+ DBUG_RETURN(1);
+ }
+ if (!(reclength= _ma_rec_pack(info,buffer,record)))
+ goto err;
+
+ record= buffer;
+
+ filepos= info->cur_row.lastpos;
+ flag=block_info.second_read=0;
+ block_info.next_filepos=filepos;
+ while (reclength > 0)
+ {
+ if ((b_type= _ma_get_block_info(info, &block_info, info->dfile.file,
+ block_info.next_filepos))
+ & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED))
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ if (flag == 0) /* First block */
+ {
+ flag=1;
+ if (reclength != block_info.rec_len)
+ {
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ } else if (reclength < block_info.data_len)
+ {
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ goto err;
+ }
+ reclength-= block_info.data_len;
+ cmp_length= block_info.data_len;
+ if (!reclength && info->s->calc_checksum)
+ cmp_length--; /* 'record' may not contain checksum */
+
+ if (_ma_cmp_buffer(info->dfile.file, record, block_info.filepos,
+ cmp_length))
+ {
+ my_errno=HA_ERR_RECORD_CHANGED;
+ goto err;
+ }
+ flag=1;
+ record+=block_info.data_len;
+ }
+ }
+ my_errno=0;
+ error= 0;
+err:
+ stack_alloc_free(buffer, buff_alloced);
+ DBUG_PRINT("exit", ("result: %d", error));
+ DBUG_RETURN(error);
+}
+
+
+ /* Compare file to buffert */
+
+static my_bool _ma_cmp_buffer(File file, const uchar *buff, my_off_t filepos,
+ uint length)
+{
+ uint next_length;
+ uchar temp_buff[IO_SIZE*2];
+ DBUG_ENTER("_ma_cmp_buffer");
+
+ next_length= IO_SIZE*2 - (uint) (filepos & (IO_SIZE-1));
+
+ while (length > IO_SIZE*2)
+ {
+ if (mysql_file_pread(file,temp_buff,next_length,filepos, MYF(MY_NABP)) ||
+ memcmp(buff, temp_buff, next_length))
+ goto err;
+ filepos+=next_length;
+ buff+=next_length;
+ length-= next_length;
+ next_length=IO_SIZE*2;
+ }
+ if (mysql_file_pread(file,temp_buff,length,filepos,MYF(MY_NABP)))
+ goto err;
+ DBUG_RETURN(memcmp(buff, temp_buff, length) != 0);
+err:
+ DBUG_RETURN(1);
+}
+
+
+/*
+ Read next record from datafile during table scan.
+
+ SYNOPSIS
+ _ma_read_rnd_dynamic_record()
+ info MARIA_HA pointer to table.
+ buf Destination for record.
+ filepos From where to read the record.
+ skip_deleted_blocks If to repeat reading until a non-deleted
+ record is found.
+
+ NOTE
+ This is identical to _ma_read_dynamic_record(), except the following
+ cases:
+
+ - If there is no active row at 'filepos', continue scanning for
+ an active row. (This is becasue the previous
+ _ma_read_rnd_dynamic_record() call stored the next block position
+ in filepos, but this position may not be a start block for a row
+ - We may have READ_CACHING enabled, in which case we use the cache
+ to read rows.
+
+ For other comments, check _ma_read_dynamic_record()
+
+ RETURN
+ 0 OK
+ != 0 Error number
+*/
+
+int _ma_read_rnd_dynamic_record(MARIA_HA *info,
+ uchar *buf,
+ MARIA_RECORD_POS filepos,
+ my_bool skip_deleted_blocks)
+{
+ int block_of_record;
+#ifdef MARIA_EXTERNAL_LOCKING
+ int info_read;
+#endif
+ uint left_len,b_type;
+ uchar *UNINIT_VAR(to);
+ MARIA_BLOCK_INFO block_info;
+ MARIA_SHARE *share= info->s;
+ myf flag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
+ DBUG_ENTER("_ma_read_rnd_dynamic_record");
+
+#ifdef MARIA_EXTERNAL_LOCKING
+ info_read=0;
+#endif
+
+ if (info->lock_type == F_UNLCK)
+ {
+#ifndef UNSAFE_LOCKING
+#else
+ info->tmp_lock_type=F_RDLCK;
+#endif
+ }
+#ifdef MARIA_EXTERNAL_LOCKING
+ else
+ info_read=1; /* memory-keyinfoblock is ok */
+#endif
+
+ block_of_record= 0; /* First block of record is numbered as zero. */
+ block_info.second_read= 0;
+ left_len=1;
+ do
+ {
+ if (filepos >= info->state->data_file_length)
+ {
+#ifdef MARIA_EXTERNAL_LOCKING
+ if (!info_read)
+ { /* Check if changed */
+ info_read=1;
+ info->rec_cache.seek_not_done=1;
+ if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
+ goto panic;
+ }
+ if (filepos >= info->state->data_file_length)
+ {
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+ }
+#else
+ my_errno= HA_ERR_END_OF_FILE;
+ goto err;
+#endif
+ }
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(info, &info->rec_cache, block_info.header, filepos,
+ sizeof(block_info.header),
+ (!block_of_record && skip_deleted_blocks ?
+ READING_NEXT : 0) | READING_HEADER))
+ goto panic;
+ b_type= _ma_get_block_info(info, &block_info,-1,filepos);
+ }
+ else
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH &&
+ flush_io_cache(&info->rec_cache))
+ DBUG_RETURN(my_errno);
+ info->rec_cache.seek_not_done=1;
+ b_type= _ma_get_block_info(info, &block_info, info->dfile.file, filepos);
+ }
+
+ if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
+ BLOCK_FATAL_ERROR))
+ {
+ if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ && skip_deleted_blocks)
+ {
+ filepos=block_info.filepos+block_info.block_len;
+ block_info.second_read=0;
+ continue; /* Search after next_record */
+ }
+ if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ {
+ my_errno= HA_ERR_RECORD_DELETED;
+ info->cur_row.lastpos= block_info.filepos;
+ info->cur_row.nextpos= block_info.filepos+block_info.block_len;
+ }
+ goto err;
+ }
+ if (block_of_record == 0) /* First block */
+ {
+ info->cur_row.total_length= block_info.rec_len;
+ if (block_info.rec_len > (uint) share->base.max_pack_length)
+ goto panic;
+ info->cur_row.lastpos= filepos;
+ if (share->base.blobs)
+ {
+ if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
+ block_info.rec_len +
+ share->base.extra_rec_buff_size, flag))
+ goto err;
+ }
+ to= info->rec_buff;
+ left_len=block_info.rec_len;
+ }
+ if (left_len < block_info.data_len)
+ goto panic; /* Wrong linked record */
+
+ /* copy information that is already read */
+ {
+ uint offset=(uint) (block_info.filepos - filepos);
+ uint tmp_length= (sizeof(block_info.header) - offset);
+ filepos=block_info.filepos;
+
+ if (tmp_length > block_info.data_len)
+ tmp_length= block_info.data_len;
+ if (tmp_length)
+ {
+ memcpy(to, block_info.header+offset, tmp_length);
+ block_info.data_len-=tmp_length;
+ left_len-=tmp_length;
+ to+=tmp_length;
+ filepos+=tmp_length;
+ }
+ }
+ /* read rest of record from file */
+ if (block_info.data_len)
+ {
+ if (info->opt_flag & READ_CACHE_USED)
+ {
+ if (_ma_read_cache(info, &info->rec_cache, to,filepos,
+ block_info.data_len,
+ (!block_of_record && skip_deleted_blocks) ?
+ READING_NEXT : 0))
+ goto panic;
+ }
+ else
+ {
+ if (info->opt_flag & WRITE_CACHE_USED &&
+ info->rec_cache.pos_in_file <
+ block_info.filepos + block_info.data_len &&
+ flush_io_cache(&info->rec_cache))
+ goto err;
+ /* VOID(my_seek(info->dfile.file, filepos, MY_SEEK_SET, MYF(0))); */
+ if (mysql_file_read(info->dfile.file, to, block_info.data_len, MYF(MY_NABP)))
+ {
+ if (my_errno == HA_ERR_FILE_TOO_SHORT)
+ {
+ /* Unexpected end of file */
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+ }
+ goto err;
+ }
+ }
+ }
+ /*
+ Increment block-of-record counter. If it was the first block,
+ remember the position behind the block for the next call.
+ */
+ if (block_of_record++ == 0)
+ {
+ info->cur_row.nextpos= block_info.filepos+block_info.block_len;
+ skip_deleted_blocks=0;
+ }
+ left_len-=block_info.data_len;
+ to+=block_info.data_len;
+ filepos=block_info.next_filepos;
+ } while (left_len);
+
+ info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED;
+ fast_ma_writeinfo(info);
+ if (_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) !=
+ MY_FILE_ERROR)
+ DBUG_RETURN(0);
+ DBUG_RETURN(my_errno); /* Wrong record */
+
+panic:
+ /* Something is fatal wrong */
+ _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
+err:
+ fast_ma_writeinfo(info);
+ DBUG_RETURN(my_errno);
+}
+
+
+ /* Read and process header from a dynamic-record-file */
+
+uint _ma_get_block_info(MARIA_HA *handler, MARIA_BLOCK_INFO *info, File file,
+ my_off_t filepos)
+{
+ uint return_val=0;
+ uchar *header=info->header;
+
+ if (file >= 0)
+ {
+ /*
+ We do not use my_pread() here because we want to have the file
+ pointer set to the end of the header after this function.
+ my_pread() may leave the file pointer untouched.
+ */
+ mysql_file_seek(file,filepos,MY_SEEK_SET,MYF(0));
+ if (mysql_file_read(file, header, sizeof(info->header),MYF(0)) !=
+ sizeof(info->header))
+ {
+ /*
+ This is either an error or just reading at end of file.
+ Don't give a fatal error for this case.
+ */
+ my_errno= HA_ERR_WRONG_IN_RECORD;
+ return BLOCK_ERROR;
+ }
+ }
+ DBUG_DUMP("header",header,MARIA_BLOCK_INFO_HEADER_LENGTH);
+ if (info->second_read)
+ {
+ if (info->header[0] <= 6 || info->header[0] == 13)
+ return_val=BLOCK_SYNC_ERROR;
+ }
+ else
+ {
+ if (info->header[0] > 6 && info->header[0] != 13)
+ return_val=BLOCK_SYNC_ERROR;
+ }
+ info->next_filepos= HA_OFFSET_ERROR; /* Dummy if no next block */
+
+ switch (info->header[0]) {
+ case 0:
+ if ((info->block_len=(uint) mi_uint3korr(header+1)) <
+ MARIA_MIN_BLOCK_LENGTH ||
+ (info->block_len & (MARIA_DYN_ALIGN_SIZE -1)))
+ goto err;
+ info->filepos=filepos;
+ info->next_filepos=mi_sizekorr(header+4);
+ info->prev_filepos=mi_sizekorr(header+12);
+#if SIZEOF_OFF_T == 4
+ if ((mi_uint4korr(header+4) != 0 &&
+ (mi_uint4korr(header+4) != (ulong) ~0 ||
+ info->next_filepos != (ulong) ~0)) ||
+ (mi_uint4korr(header+12) != 0 &&
+ (mi_uint4korr(header+12) != (ulong) ~0 ||
+ info->prev_filepos != (ulong) ~0)))
+ goto err;
+#endif
+ return return_val | BLOCK_DELETED; /* Deleted block */
+
+ case 1:
+ info->rec_len=info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->filepos=filepos+3;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+ case 2:
+ info->rec_len=info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->filepos=filepos+4;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+
+ case 13:
+ info->rec_len=mi_uint4korr(header+1);
+ info->block_len=info->data_len=mi_uint3korr(header+5);
+ info->next_filepos=mi_sizekorr(header+8);
+ info->second_read=1;
+ info->filepos=filepos+16;
+ return return_val | BLOCK_FIRST;
+
+ case 3:
+ info->rec_len=info->data_len=mi_uint2korr(header+1);
+ info->block_len=info->rec_len+ (uint) header[3];
+ info->filepos=filepos+4;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+ case 4:
+ info->rec_len=info->data_len=mi_uint3korr(header+1);
+ info->block_len=info->rec_len+ (uint) header[4];
+ info->filepos=filepos+5;
+ return return_val | BLOCK_FIRST | BLOCK_LAST;
+
+ case 5:
+ info->rec_len=mi_uint2korr(header+1);
+ info->block_len=info->data_len=mi_uint2korr(header+3);
+ info->next_filepos=mi_sizekorr(header+5);
+ info->second_read=1;
+ info->filepos=filepos+13;
+ return return_val | BLOCK_FIRST;
+ case 6:
+ info->rec_len=mi_uint3korr(header+1);
+ info->block_len=info->data_len=mi_uint3korr(header+4);
+ info->next_filepos=mi_sizekorr(header+7);
+ info->second_read=1;
+ info->filepos=filepos+15;
+ return return_val | BLOCK_FIRST;
+
+ /* The following blocks are identical to 1-6 without rec_len */
+ case 7:
+ info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->filepos=filepos+3;
+ return return_val | BLOCK_LAST;
+ case 8:
+ info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->filepos=filepos+4;
+ return return_val | BLOCK_LAST;
+
+ case 9:
+ info->data_len=mi_uint2korr(header+1);
+ info->block_len=info->data_len+ (uint) header[3];
+ info->filepos=filepos+4;
+ return return_val | BLOCK_LAST;
+ case 10:
+ info->data_len=mi_uint3korr(header+1);
+ info->block_len=info->data_len+ (uint) header[4];
+ info->filepos=filepos+5;
+ return return_val | BLOCK_LAST;
+
+ case 11:
+ info->data_len=info->block_len=mi_uint2korr(header+1);
+ info->next_filepos=mi_sizekorr(header+3);
+ info->second_read=1;
+ info->filepos=filepos+11;
+ return return_val;
+ case 12:
+ info->data_len=info->block_len=mi_uint3korr(header+1);
+ info->next_filepos=mi_sizekorr(header+4);
+ info->second_read=1;
+ info->filepos=filepos+12;
+ return return_val;
+ }
+
+err:
+ if (!handler->in_check_table)
+ {
+ /* We may be scanning the table for new rows; Don't give an error */
+ _ma_set_fatal_error(handler, HA_ERR_WRONG_IN_RECORD);
+ }
+ return BLOCK_ERROR;
+}