/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB Copyright (c) 2020, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ /* Functions to handle keys */ #include "maria_def.h" #include "m_ctype.h" #include "ma_sp_defs.h" #include "ma_blockrec.h" /* For ROW_FLAG_TRANSID */ #include "trnman.h" #ifdef HAVE_IEEEFP_H #include #endif #define CHECK_KEYS /* Enable safety checks */ static int _ma_put_key_in_record(MARIA_HA *info, uint keynr, my_bool unpack_blobs, uchar *record); #define FIX_LENGTH(cs, pos, length, char_length) \ do { \ if (length > char_length) \ char_length= (uint) my_ci_charpos(cs, (const char *) pos, \ (const char *) pos+length, \ char_length); \ set_if_smaller(char_length,length); \ } while(0) /** Store trid in a packed format as part of a key @fn transid_store_packed @param info Maria handler @param to End of key to which we should store a packed transid @param trid Trid to be stored @notes Keys that have a transid has the lowest bit set for the last byte of the key This function sets this bit for the key. Trid is max 6 bytes long First Trid it's converted to a smaller number by using trid= trid - create_trid. Then trid is then shifted up one bit so that we can use the lowest bit as a marker if it's followed by another trid. Trid is then stored as follows: if trid < 256-12 one byte else one byte prefix length_of_trid_in_bytes + 249 followed by data in high-byte-first order Prefix bytes 244 to 249 are reserved for negative transid, that can be used when we pack transid relative to each other on a key block. We have to store transid in high-byte-first order so that we can compare them unpacked byte per byte and as soon we find a difference we know which is smaller. For example, assuming we the following data: key_data: 1 (4 byte integer) pointer_to_row: 2 << 8 + 3 = 515 (page 2, row 3) table_create_transid 1000 Defined at create table time and stored in table definition transid 1010 Transaction that created row delete_transid 2011 Transaction that deleted row In addition we assume the table is created with a data pointer length of 4 bytes (this is automatically calculated based on the medium length of rows and the given max number of rows) The binary data for the key would then look like this in hex: 00 00 00 01 Key data (1 stored high byte first) 00 00 00 47 (515 << 1) + 1 ; The last 1 is marker that key cont. 15 ((1010-1000) << 1) + 1 ; The last 1 is marker that key cont. FB 07 E6 Length byte (FE = 249 + 2 means 2 bytes) and ((2011 - 1000) << 1) = 07 E6 */ uint transid_store_packed(MARIA_HA *info, uchar *to, ulonglong trid) { uchar *start; uint length; uchar buff[8]; DBUG_ASSERT(trid < (1LL << (MARIA_MAX_PACK_TRANSID_SIZE*8))); DBUG_ASSERT(trid >= info->s->state.create_trid); trid= (trid - info->s->state.create_trid) << 1; /* Mark that key contains transid */ to[-1]|= 1; if (trid < MARIA_MIN_TRANSID_PACK_OFFSET) { to[0]= (uchar) trid; return 1; } start= to; /* store things in low-byte-first-order in buff */ to= buff; do { *to++= (uchar) trid; trid= trid>>8; } while (trid); length= (uint) (to - buff); /* Store length prefix */ start[0]= (uchar) (length + MARIA_TRANSID_PACK_OFFSET); start++; /* Copy things in high-byte-first order to output buffer */ do { *start++= *--to; } while (to != buff); return length+1; } /** Read packed transid @fn transid_get_packed @param info Maria handler @param from Transid is stored here See transid_store_packed() for how transid is packed */ ulonglong transid_get_packed(MARIA_SHARE *share, const uchar *from) { ulonglong value; uint length; if (from[0] < MARIA_MIN_TRANSID_PACK_OFFSET) value= (ulonglong) from[0]; else { value= 0; for (length= (uint) (from[0] - MARIA_TRANSID_PACK_OFFSET), value= (ulonglong) from[1], from+=2; --length ; from++) value= (value << 8) + ((ulonglong) *from); } return (value >> 1) + share->state.create_trid; } /* Make a normal (not spatial or fulltext) intern key from a record SYNOPSIS _ma_make_key() info MyiSAM handler int_key Store created key here keynr key number key Buffer used to store key data record Record filepos Position to record in the data file NOTES This is used to generate keys from the record on insert, update and delete RETURN key */ MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr, uchar *key, const uchar *record, MARIA_RECORD_POS filepos, ulonglong trid) { const uchar *pos; reg1 HA_KEYSEG *keyseg; my_bool is_ft; DBUG_ENTER("_ma_make_key"); int_key->data= key; int_key->flag= 0; /* Always return full key */ int_key->keyinfo= info->s->keyinfo + keynr; is_ft= int_key->keyinfo->flag & HA_FULLTEXT; for (keyseg= int_key->keyinfo->seg ; keyseg->type ;keyseg++) { enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; uint length=keyseg->length; uint char_length; CHARSET_INFO *cs=keyseg->charset; if (keyseg->null_bit) { if (record[keyseg->null_pos] & keyseg->null_bit) { *key++= 0; /* NULL in key */ continue; } *key++=1; /* Not NULL */ } char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : length); pos= record+keyseg->start; if (type == HA_KEYTYPE_BIT) { if (keyseg->bit_length) { uchar bits= get_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, keyseg->bit_length); *key++= (char) bits; length--; } memcpy(key, pos, length); key+= length; continue; } if (keyseg->flag & HA_SPACE_PACK) { if (type != HA_KEYTYPE_NUM) { length= (uint) my_ci_lengthsp(cs, (const char*)pos, length); } else { const uchar *end= pos + length; while (pos < end && pos[0] == ' ') pos++; length= (uint) (end-pos); } FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); memcpy(key, pos, (size_t) char_length); key+=char_length; continue; } if (keyseg->flag & HA_VAR_LENGTH_PART) { uint pack_length= (keyseg->bit_start == 1 ? 1 : 2); uint tmp_length= (pack_length == 1 ? (uint) *pos : uint2korr(pos)); pos+= pack_length; /* Skip VARCHAR length */ set_if_smaller(length,tmp_length); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); memcpy(key,pos,(size_t) char_length); key+= char_length; continue; } else if (keyseg->flag & HA_BLOB_PART) { uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); uchar *blob_pos; memcpy(&blob_pos, pos+keyseg->bit_start,sizeof(char*)); set_if_smaller(length,tmp_length); FIX_LENGTH(cs, blob_pos, length, char_length); store_key_length_inc(key,char_length); memcpy(key, blob_pos, (size_t) char_length); key+= char_length; continue; } else if (keyseg->flag & HA_SWAP_KEY) { /* Numerical column */ if (type == HA_KEYTYPE_FLOAT) { float nr; float4get(nr,pos); if (isnan(nr)) { /* Replace NAN with zero */ bzero(key,length); key+=length; continue; } } else if (type == HA_KEYTYPE_DOUBLE) { double nr; float8get(nr,pos); if (isnan(nr)) { bzero(key,length); key+=length; continue; } } pos+=length; while (length--) { *key++ = *--pos; } continue; } FIX_LENGTH(cs, pos, length, char_length); memcpy(key, pos, char_length); if (length > char_length) my_ci_fill(cs, (char*) key+char_length, length-char_length, ' '); key+= length; } _ma_dpointer(info->s, key, filepos); int_key->data_length= (uint)(key - int_key->data); int_key->ref_length= info->s->rec_reflength; int_key->flag= 0; if (_ma_have_versioning(info) && trid) { int_key->ref_length+= transid_store_packed(info, key + int_key->ref_length, (TrID) trid); int_key->flag|= SEARCH_USER_KEY_HAS_TRANSID; } DBUG_PRINT("exit",("keynr: %d",keynr)); DBUG_DUMP_KEY("key", int_key); DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, int_key);); DBUG_RETURN(int_key); } /* _ma_make_key */ /* Pack a key to intern format from given format (c_rkey) SYNOPSIS _ma_pack_key() info MARIA handler int_key Store key here keynr key number key Buffer for key data old Original not packed key keypart_map bitmap of used keyparts last_used_keyseg out parameter. May be NULL RETURN int_key last_use_keyseg Store pointer to the keyseg after the last used one */ MARIA_KEY *_ma_pack_key(register MARIA_HA *info, MARIA_KEY *int_key, uint keynr, uchar *key, const uchar *old, key_part_map keypart_map, HA_KEYSEG **last_used_keyseg) { HA_KEYSEG *keyseg; my_bool is_ft; DBUG_ENTER("_ma_pack_key"); int_key->data= key; int_key->keyinfo= info->s->keyinfo + keynr; /* "one part" rtree key is 2*SPDIMS part key in Maria */ if (int_key->keyinfo->key_alg == HA_KEY_ALG_RTREE) keypart_map= (((key_part_map)1) << (2*SPDIMS)) - 1; /* only key prefixes are supported */ DBUG_ASSERT(((keypart_map+1) & keypart_map) == 0); is_ft= int_key->keyinfo->flag & HA_FULLTEXT; for (keyseg=int_key->keyinfo->seg ; keyseg->type && keypart_map; old+= keyseg->length, keyseg++) { enum ha_base_keytype type= (enum ha_base_keytype) keyseg->type; uint length= keyseg->length; uint char_length; const uchar *pos; CHARSET_INFO *cs=keyseg->charset; keypart_map>>= 1; if (keyseg->null_bit) { if (!(*key++= (char) 1-*old++)) /* Copy null marker */ { if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) old+= 2; continue; /* Found NULL */ } } char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : length); pos= old; if (keyseg->flag & HA_SPACE_PACK) { const uchar *end= pos + length; if (type == HA_KEYTYPE_NUM) { while (pos < end && pos[0] == ' ') pos++; } else if (type != HA_KEYTYPE_BINARY) { while (end > pos && end[-1] == ' ') end--; } length=(uint) (end-pos); FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); memcpy(key,pos,(size_t) char_length); key+= char_length; continue; } else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) { /* Length of key-part used with maria_rkey() always 2 */ uint tmp_length=uint2korr(pos); pos+=2; set_if_smaller(length,tmp_length); /* Safety */ FIX_LENGTH(cs, pos, length, char_length); store_key_length_inc(key,char_length); old+=2; /* Skip length */ memcpy(key, pos,(size_t) char_length); key+= char_length; continue; } else if (keyseg->flag & HA_SWAP_KEY) { /* Numerical column */ pos+=length; while (length--) *key++ = *--pos; continue; } FIX_LENGTH(cs, pos, length, char_length); memcpy(key, pos, char_length); if (length > char_length) my_ci_fill(cs, (char*) key+char_length, length-char_length, ' '); key+= length; } if (last_used_keyseg) *last_used_keyseg= keyseg; /* set flag to SEARCH_PART_KEY if we are not using all key parts */ int_key->flag= keyseg->type ? SEARCH_PART_KEY : 0; int_key->ref_length= 0; int_key->data_length= (uint)(key - int_key->data); DBUG_PRINT("exit", ("length: %u", int_key->data_length)); DBUG_RETURN(int_key); } /* _ma_pack_key */ /** Copy a key */ void _ma_copy_key(MARIA_KEY *to, const MARIA_KEY *from) { memcpy(to->data, from->data, from->data_length + from->ref_length); to->keyinfo= from->keyinfo; to->data_length= from->data_length; to->ref_length= from->ref_length; to->flag= from->flag; } /* Store found key in record SYNOPSIS _ma_put_key_in_record() info MARIA handler keynr Key number that was used unpack_blobs TRUE <=> Unpack blob columns FALSE <=> Skip them. This is used by index condition pushdown check function record Store key here Last read key is in info->lastkey NOTES Used when only-keyread is wanted RETURN 0 ok 1 error */ static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, my_bool unpack_blobs, uchar *record) { reg2 uchar *key; uchar *pos,*key_end; reg1 HA_KEYSEG *keyseg; uchar *blob_ptr; DBUG_ENTER("_ma_put_key_in_record"); blob_ptr= info->lastkey_buff2; /* Place to put blob parts */ key= info->last_key.data; /* Key that was read */ key_end= key + info->last_key.data_length; for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) { if (keyseg->null_bit) { if (!*key++) { record[keyseg->null_pos]|= keyseg->null_bit; continue; } record[keyseg->null_pos]&= ~keyseg->null_bit; } if (keyseg->type == HA_KEYTYPE_BIT) { uint length= keyseg->length; if (keyseg->bit_length) { uchar bits= *key++; set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start, keyseg->bit_length); length--; } else { clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, keyseg->bit_length); } memcpy(record + keyseg->start, key, length); key+= length; continue; } if (keyseg->flag & HA_SPACE_PACK) { uint length; get_key_length(length,key); #ifdef CHECK_KEYS if (length > keyseg->length || key+length > key_end) goto err; #endif pos= record+keyseg->start; if (keyseg->type != (int) HA_KEYTYPE_NUM) { memcpy(pos,key,(size_t) length); my_ci_fill(keyseg->charset, (char*) pos + length, keyseg->length - length, ' '); } else { bfill(pos,keyseg->length-length,' '); memcpy(pos+keyseg->length-length,key,(size_t) length); } key+=length; continue; } if (keyseg->flag & HA_VAR_LENGTH_PART) { uint length; get_key_length(length,key); #ifdef CHECK_KEYS if (length > keyseg->length || key+length > key_end) goto err; #endif /* Store key length */ if (keyseg->bit_start == 1) *(uchar*) (record+keyseg->start)= (uchar) length; else int2store(record+keyseg->start, length); /* And key data */ memcpy(record+keyseg->start + keyseg->bit_start, key, length); key+= length; } else if (keyseg->flag & HA_BLOB_PART) { uint length; get_key_length(length,key); #ifdef CHECK_KEYS if (length > keyseg->length || key+length > key_end) goto err; #endif if (unpack_blobs) { memcpy(record+keyseg->start+keyseg->bit_start, &blob_ptr, sizeof(char*)); memcpy(blob_ptr,key,length); blob_ptr+=length; /* The above changed info->lastkey2. Inform maria_rnext_same(). */ info->update&= ~HA_STATE_RNEXT_SAME; _ma_store_blob_length(record+keyseg->start, (uint) keyseg->bit_start,length); } key+=length; } else if (keyseg->flag & HA_SWAP_KEY) { uchar *to= record+keyseg->start+keyseg->length; uchar *end= key+keyseg->length; #ifdef CHECK_KEYS if (end > key_end) goto err; #endif do { *--to= *key++; } while (key != end); continue; } else { #ifdef CHECK_KEYS if (key+keyseg->length > key_end) goto err; #endif memcpy(record+keyseg->start, key, (size_t) keyseg->length); key+= keyseg->length; } } DBUG_RETURN(0); err: DBUG_PRINT("info",("error")); DBUG_RETURN(1); /* Crashed row */ } /* _ma_put_key_in_record */ /* Here when key reads are used */ int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS filepos) { fast_ma_writeinfo(info); if (filepos != HA_OFFSET_ERROR) { if (info->lastinx >= 0) { /* Read only key */ if (_ma_put_key_in_record(info, (uint)info->lastinx, TRUE, buf)) { _ma_set_fatal_error(info, HA_ERR_CRASHED); return -1; } info->update|= HA_STATE_AKTIV; /* We should find a record */ return 0; } my_errno=HA_ERR_WRONG_INDEX; } return(-1); /* Wrong data to read */ } /* Save current key tuple to record and call index condition check function SYNOPSIS ma_check_index_cond() info MyISAM handler keynr Index we're running a scan on record Record buffer to use (it is assumed that index check function will look for column values there) RETURN CHECK_ERROR Error ; my_errno set to HA_ERR_CRASHED CHECK_NEG Index condition is not satisfied, continue scanning CHECK_POS Index condition is satisfied CHECK_OUT_OF_RANGE Index condition is not satisfied, end the scan. my_errno set to HA_ERR_END_OF_FILE info->cur_row.lastpos is set to HA_OFFSET_ERROR in case of CHECK_ERROR or CHECK_OUT_OF_RANGE to indicate that we don't have any active row. */ check_result_t ma_check_index_cond_real(register MARIA_HA *info, uint keynr, uchar *record) { check_result_t res= CHECK_POS; DBUG_ASSERT(info->index_cond_func || info->rowid_filter_func); if (_ma_put_key_in_record(info, keynr, FALSE, record)) { /* Impossible case; Can only happen if bug in code */ _ma_print_error(info, HA_ERR_CRASHED, 0); info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */ my_errno= HA_ERR_CRASHED; return CHECK_ERROR; } if (info->index_cond_func) { if ((res= info->index_cond_func(info->index_cond_func_arg)) == CHECK_OUT_OF_RANGE) { /* We got beyond the end of scanned range */ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */ my_errno= HA_ERR_END_OF_FILE; return res; } /* If we got an error, out-of-range condition, or ICP condition computed to FALSE - we don't need to check the Rowid Filter. */ if (res != CHECK_POS) return res; } /* Check the Rowid Filter, if present */ if (info->rowid_filter_func) { if ((res= info->rowid_filter_func(info->rowid_filter_func_arg)) == CHECK_OUT_OF_RANGE) { /* We got beyond the end of scanned range */ info->cur_row.lastpos= HA_OFFSET_ERROR; /* No active record */ my_errno= HA_ERR_END_OF_FILE; } } return res; } /* Retrieve auto_increment info SYNOPSIS retrieve_auto_increment() key Auto-increment key key_type Key's type NOTE 'key' should in "record" format, that is, how it is packed in a record (this matters with HA_SWAP_KEY). IMPLEMENTATION For signed columns we don't retrieve the auto increment value if it's less than zero. */ ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type) { ulonglong value= 0; /* Store unsigned values here */ longlong s_value= 0; /* Store signed values here */ switch (key_type) { case HA_KEYTYPE_INT8: s_value= (longlong) *(const signed char*) key; break; case HA_KEYTYPE_BINARY: value=(ulonglong) *key; break; case HA_KEYTYPE_SHORT_INT: s_value= (longlong) sint2korr(key); break; case HA_KEYTYPE_USHORT_INT: value=(ulonglong) uint2korr(key); break; case HA_KEYTYPE_LONG_INT: s_value= (longlong) sint4korr(key); break; case HA_KEYTYPE_ULONG_INT: value=(ulonglong) uint4korr(key); break; case HA_KEYTYPE_INT24: s_value= (longlong) sint3korr(key); break; case HA_KEYTYPE_UINT24: value=(ulonglong) uint3korr(key); break; case HA_KEYTYPE_FLOAT: /* This shouldn't be used */ { float f_1; float4get(f_1,key); /* Ignore negative values */ value = (f_1 < (float) 0.0) ? 0 : (ulonglong) f_1; break; } case HA_KEYTYPE_DOUBLE: /* This shouldn't be used */ { double f_1; float8get(f_1,key); /* Ignore negative values */ value = (f_1 < 0.0) ? 0 : (ulonglong) f_1; break; } case HA_KEYTYPE_LONGLONG: s_value= sint8korr(key); break; case HA_KEYTYPE_ULONGLONG: value= uint8korr(key); break; default: DBUG_ASSERT(0); value=0; /* Error */ break; } /* The following code works becasue if s_value < 0 then value is 0 and if s_value == 0 then value will contain either s_value or the correct value. */ return (s_value > 0) ? (ulonglong) s_value : value; }